pyerrors.obs

   1import warnings
   2import hashlib
   3import pickle
   4from math import gcd
   5from functools import reduce
   6import numpy as np
   7import autograd.numpy as anp  # Thinly-wrapped numpy
   8from autograd import jacobian
   9import matplotlib.pyplot as plt
  10from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  11import numdifftools as nd
  12from itertools import groupby
  13from .covobs import Covobs
  14
  15# Improve print output of numpy.ndarrays containing Obs objects.
  16np.set_printoptions(formatter={'object': lambda x: str(x)})
  17
  18
  19class Obs:
  20    """Class for a general observable.
  21
  22    Instances of Obs are the basic objects of a pyerrors error analysis.
  23    They are initialized with a list which contains arrays of samples for
  24    different ensembles/replica and another list of same length which contains
  25    the names of the ensembles/replica. Mathematical operations can be
  26    performed on instances. The result is another instance of Obs. The error of
  27    an instance can be computed with the gamma_method. Also contains additional
  28    methods for output and visualization of the error calculation.
  29
  30    Attributes
  31    ----------
  32    S_global : float
  33        Standard value for S (default 2.0)
  34    S_dict : dict
  35        Dictionary for S values. If an entry for a given ensemble
  36        exists this overwrites the standard value for that ensemble.
  37    tau_exp_global : float
  38        Standard value for tau_exp (default 0.0)
  39    tau_exp_dict : dict
  40        Dictionary for tau_exp values. If an entry for a given ensemble exists
  41        this overwrites the standard value for that ensemble.
  42    N_sigma_global : float
  43        Standard value for N_sigma (default 1.0)
  44    N_sigma_dict : dict
  45        Dictionary for N_sigma values. If an entry for a given ensemble exists
  46        this overwrites the standard value for that ensemble.
  47    """
  48    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  49                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  50                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  51                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  52                 'idl', 'tag', '_covobs', '__dict__']
  53
  54    S_global = 2.0
  55    S_dict = {}
  56    tau_exp_global = 0.0
  57    tau_exp_dict = {}
  58    N_sigma_global = 1.0
  59    N_sigma_dict = {}
  60
  61    def __init__(self, samples, names, idl=None, **kwargs):
  62        """ Initialize Obs object.
  63
  64        Parameters
  65        ----------
  66        samples : list
  67            list of numpy arrays containing the Monte Carlo samples
  68        names : list
  69            list of strings labeling the individual samples
  70        idl : list, optional
  71            list of ranges or lists on which the samples are defined
  72        """
  73
  74        if kwargs.get("means") is None and len(samples):
  75            if len(samples) != len(names):
  76                raise ValueError('Length of samples and names incompatible.')
  77            if idl is not None:
  78                if len(idl) != len(names):
  79                    raise ValueError('Length of idl incompatible with samples and names.')
  80            name_length = len(names)
  81            if name_length > 1:
  82                if name_length != len(set(names)):
  83                    raise ValueError('Names are not unique.')
  84                if not all(isinstance(x, str) for x in names):
  85                    raise TypeError('All names have to be strings.')
  86            else:
  87                if not isinstance(names[0], str):
  88                    raise TypeError('All names have to be strings.')
  89            if min(len(x) for x in samples) <= 4:
  90                raise ValueError('Samples have to have at least 5 entries.')
  91
  92        self.names = sorted(names)
  93        self.shape = {}
  94        self.r_values = {}
  95        self.deltas = {}
  96        self._covobs = {}
  97
  98        self._value = 0
  99        self.N = 0
 100        self.idl = {}
 101        if idl is not None:
 102            for name, idx in sorted(zip(names, idl)):
 103                if isinstance(idx, range):
 104                    self.idl[name] = idx
 105                elif isinstance(idx, (list, np.ndarray)):
 106                    dc = np.unique(np.diff(idx))
 107                    if np.any(dc < 0):
 108                        raise ValueError("Unsorted idx for idl[%s]" % (name))
 109                    if len(dc) == 1:
 110                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 111                    else:
 112                        self.idl[name] = list(idx)
 113                else:
 114                    raise TypeError('incompatible type for idl[%s].' % (name))
 115        else:
 116            for name, sample in sorted(zip(names, samples)):
 117                self.idl[name] = range(1, len(sample) + 1)
 118
 119        if kwargs.get("means") is not None:
 120            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 121                self.shape[name] = len(self.idl[name])
 122                self.N += self.shape[name]
 123                self.r_values[name] = mean
 124                self.deltas[name] = sample
 125        else:
 126            for name, sample in sorted(zip(names, samples)):
 127                self.shape[name] = len(self.idl[name])
 128                self.N += self.shape[name]
 129                if len(sample) != self.shape[name]:
 130                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 131                self.r_values[name] = np.mean(sample)
 132                self.deltas[name] = sample - self.r_values[name]
 133                self._value += self.shape[name] * self.r_values[name]
 134            self._value /= self.N
 135
 136        self._dvalue = 0.0
 137        self.ddvalue = 0.0
 138        self.reweighted = False
 139
 140        self.tag = None
 141
 142    @property
 143    def value(self):
 144        return self._value
 145
 146    @property
 147    def dvalue(self):
 148        return self._dvalue
 149
 150    @property
 151    def e_names(self):
 152        return sorted(set([o.split('|')[0] for o in self.names]))
 153
 154    @property
 155    def cov_names(self):
 156        return sorted(set([o for o in self.covobs.keys()]))
 157
 158    @property
 159    def mc_names(self):
 160        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 161
 162    @property
 163    def e_content(self):
 164        res = {}
 165        for e, e_name in enumerate(self.e_names):
 166            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 167            if e_name in self.names:
 168                res[e_name].append(e_name)
 169        return res
 170
 171    @property
 172    def covobs(self):
 173        return self._covobs
 174
 175    def gamma_method(self, **kwargs):
 176        """Estimate the error and related properties of the Obs.
 177
 178        Parameters
 179        ----------
 180        S : float
 181            specifies a custom value for the parameter S (default 2.0).
 182            If set to 0 it is assumed that the data exhibits no
 183            autocorrelation. In this case the error estimates coincides
 184            with the sample standard error.
 185        tau_exp : float
 186            positive value triggers the critical slowing down analysis
 187            (default 0.0).
 188        N_sigma : float
 189            number of standard deviations from zero until the tail is
 190            attached to the autocorrelation function (default 1).
 191        fft : bool
 192            determines whether the fft algorithm is used for the computation
 193            of the autocorrelation function (default True)
 194        """
 195
 196        e_content = self.e_content
 197        self.e_dvalue = {}
 198        self.e_ddvalue = {}
 199        self.e_tauint = {}
 200        self.e_dtauint = {}
 201        self.e_windowsize = {}
 202        self.e_n_tauint = {}
 203        self.e_n_dtauint = {}
 204        e_gamma = {}
 205        self.e_rho = {}
 206        self.e_drho = {}
 207        self._dvalue = 0
 208        self.ddvalue = 0
 209
 210        self.S = {}
 211        self.tau_exp = {}
 212        self.N_sigma = {}
 213
 214        if kwargs.get('fft') is False:
 215            fft = False
 216        else:
 217            fft = True
 218
 219        def _parse_kwarg(kwarg_name):
 220            if kwarg_name in kwargs:
 221                tmp = kwargs.get(kwarg_name)
 222                if isinstance(tmp, (int, float)):
 223                    if tmp < 0:
 224                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 225                    for e, e_name in enumerate(self.e_names):
 226                        getattr(self, kwarg_name)[e_name] = tmp
 227                else:
 228                    raise TypeError(kwarg_name + ' is not in proper format.')
 229            else:
 230                for e, e_name in enumerate(self.e_names):
 231                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 232                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 233                    else:
 234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 235
 236        _parse_kwarg('S')
 237        _parse_kwarg('tau_exp')
 238        _parse_kwarg('N_sigma')
 239
 240        for e, e_name in enumerate(self.mc_names):
 241            r_length = []
 242            for r_name in e_content[e_name]:
 243                if isinstance(self.idl[r_name], range):
 244                    r_length.append(len(self.idl[r_name]))
 245                else:
 246                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
 247
 248            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 249            w_max = max(r_length) // 2
 250            e_gamma[e_name] = np.zeros(w_max)
 251            self.e_rho[e_name] = np.zeros(w_max)
 252            self.e_drho[e_name] = np.zeros(w_max)
 253
 254            for r_name in e_content[e_name]:
 255                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
 256
 257            gamma_div = np.zeros(w_max)
 258            for r_name in e_content[e_name]:
 259                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
 260            gamma_div[gamma_div < 1] = 1.0
 261            e_gamma[e_name] /= gamma_div[:w_max]
 262
 263            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 264                self.e_tauint[e_name] = 0.5
 265                self.e_dtauint[e_name] = 0.0
 266                self.e_dvalue[e_name] = 0.0
 267                self.e_ddvalue[e_name] = 0.0
 268                self.e_windowsize[e_name] = 0
 269                continue
 270
 271            gaps = []
 272            for r_name in e_content[e_name]:
 273                if isinstance(self.idl[r_name], range):
 274                    gaps.append(1)
 275                else:
 276                    gaps.append(np.min(np.diff(self.idl[r_name])))
 277
 278            if not np.all([gi == gaps[0] for gi in gaps]):
 279                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
 280            else:
 281                gapsize = gaps[0]
 282
 283            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 284            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 285            # Make sure no entry of tauint is smaller than 0.5
 286            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 287            # hep-lat/0306017 eq. (42)
 288            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
 289            self.e_n_dtauint[e_name][0] = 0.0
 290
 291            def _compute_drho(i):
 292                tmp = (self.e_rho[e_name][i + 1:w_max]
 293                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1],
 294                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
 295                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
 296                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 297
 298            if self.tau_exp[e_name] > 0:
 299                _compute_drho(gapsize)
 300                texp = self.tau_exp[e_name]
 301                # Critical slowing down analysis
 302                if w_max // 2 <= 1:
 303                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 304                for n in range(gapsize, w_max // 2, gapsize):
 305                    _compute_drho(n + gapsize)
 306                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 307                        # Bias correction hep-lat/0306017 eq. (49) included
 308                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 309                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 310                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 311                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 312                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
 313                        self.e_windowsize[e_name] = n
 314                        break
 315            else:
 316                if self.S[e_name] == 0.0:
 317                    self.e_tauint[e_name] = 0.5
 318                    self.e_dtauint[e_name] = 0.0
 319                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 320                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 321                    self.e_windowsize[e_name] = 0
 322                else:
 323                    # Standard automatic windowing procedure
 324                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
 325                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 326                    for n in range(1, w_max // gapsize):
 327                        if g_w[n - 1] < 0 or n >= w_max // gapsize - 1:
 328                            _compute_drho(gapsize * n)
 329                            n *= gapsize
 330                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 331                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 332                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 333                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
 334                            self.e_windowsize[e_name] = n
 335                            break
 336
 337            self._dvalue += self.e_dvalue[e_name] ** 2
 338            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 339
 340        for e_name in self.cov_names:
 341            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 342            self.e_ddvalue[e_name] = 0
 343            self._dvalue += self.e_dvalue[e_name]**2
 344
 345        self._dvalue = np.sqrt(self._dvalue)
 346        if self._dvalue == 0.0:
 347            self.ddvalue = 0.0
 348        else:
 349            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 350        return
 351
 352    gm = gamma_method
 353
 354    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
 355        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 356           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 357
 358        Parameters
 359        ----------
 360        deltas : list
 361            List of fluctuations
 362        idx : list
 363            List or range of configurations on which the deltas are defined.
 364        shape : int
 365            Number of configurations in idx.
 366        w_max : int
 367            Upper bound for the summation window.
 368        fft : bool
 369            determines whether the fft algorithm is used for the computation
 370            of the autocorrelation function.
 371        """
 372        gamma = np.zeros(w_max)
 373        deltas = _expand_deltas(deltas, idx, shape)
 374        new_shape = len(deltas)
 375        if fft:
 376            max_gamma = min(new_shape, w_max)
 377            # The padding for the fft has to be even
 378            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 379            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 380        else:
 381            for n in range(w_max):
 382                if new_shape - n >= 0:
 383                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 384
 385        return gamma
 386
 387    def details(self, ens_content=True):
 388        """Output detailed properties of the Obs.
 389
 390        Parameters
 391        ----------
 392        ens_content : bool
 393            print details about the ensembles and replica if true.
 394        """
 395        if self.tag is not None:
 396            print("Description:", self.tag)
 397        if not hasattr(self, 'e_dvalue'):
 398            print('Result\t %3.8e' % (self.value))
 399        else:
 400            if self.value == 0.0:
 401                percentage = np.nan
 402            else:
 403                percentage = np.abs(self._dvalue / self.value) * 100
 404            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 405            if len(self.e_names) > 1:
 406                print(' Ensemble errors:')
 407            e_content = self.e_content
 408            for e_name in self.mc_names:
 409                if isinstance(self.idl[e_content[e_name][0]], range):
 410                    gap = self.idl[e_content[e_name][0]].step
 411                else:
 412                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
 413
 414                if len(self.e_names) > 1:
 415                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 416                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 417                tau_string += f" in units of {gap} config"
 418                if gap > 1:
 419                    tau_string += "s"
 420                if self.tau_exp[e_name] > 0:
 421                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 422                else:
 423                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 424                print(tau_string)
 425            for e_name in self.cov_names:
 426                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 427        if ens_content is True:
 428            if len(self.e_names) == 1:
 429                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 430            else:
 431                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 432            my_string_list = []
 433            for key, value in sorted(self.e_content.items()):
 434                if key not in self.covobs:
 435                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 436                    if len(value) == 1:
 437                        my_string += f': {self.shape[value[0]]} configurations'
 438                        if isinstance(self.idl[value[0]], range):
 439                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 440                        else:
 441                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 442                    else:
 443                        sublist = []
 444                        for v in value:
 445                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 446                            my_substring += f': {self.shape[v]} configurations'
 447                            if isinstance(self.idl[v], range):
 448                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 449                            else:
 450                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 451                            sublist.append(my_substring)
 452
 453                        my_string += '\n' + '\n'.join(sublist)
 454                else:
 455                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 456                my_string_list.append(my_string)
 457            print('\n'.join(my_string_list))
 458
 459    def reweight(self, weight):
 460        """Reweight the obs with given rewighting factors.
 461
 462        Parameters
 463        ----------
 464        weight : Obs
 465            Reweighting factor. An Observable that has to be defined on a superset of the
 466            configurations in obs[i].idl for all i.
 467        all_configs : bool
 468            if True, the reweighted observables are normalized by the average of
 469            the reweighting factor on all configurations in weight.idl and not
 470            on the configurations in obs[i].idl. Default False.
 471        """
 472        return reweight(weight, [self])[0]
 473
 474    def is_zero_within_error(self, sigma=1):
 475        """Checks whether the observable is zero within 'sigma' standard errors.
 476
 477        Parameters
 478        ----------
 479        sigma : int
 480            Number of standard errors used for the check.
 481
 482        Works only properly when the gamma method was run.
 483        """
 484        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 485
 486    def is_zero(self, atol=1e-10):
 487        """Checks whether the observable is zero within a given tolerance.
 488
 489        Parameters
 490        ----------
 491        atol : float
 492            Absolute tolerance (for details see numpy documentation).
 493        """
 494        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 495
 496    def plot_tauint(self, save=None):
 497        """Plot integrated autocorrelation time for each ensemble.
 498
 499        Parameters
 500        ----------
 501        save : str
 502            saves the figure to a file named 'save' if.
 503        """
 504        if not hasattr(self, 'e_dvalue'):
 505            raise Exception('Run the gamma method first.')
 506
 507        for e, e_name in enumerate(self.mc_names):
 508            fig = plt.figure()
 509            plt.xlabel(r'$W$')
 510            plt.ylabel(r'$\tau_\mathrm{int}$')
 511            length = int(len(self.e_n_tauint[e_name]))
 512            if self.tau_exp[e_name] > 0:
 513                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 514                x_help = np.arange(2 * self.tau_exp[e_name])
 515                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 516                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 517                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 518                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 519                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 520                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 521                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 522            else:
 523                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 524                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 525
 526            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 527            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 528            plt.legend()
 529            plt.xlim(-0.5, xmax)
 530            ylim = plt.ylim()
 531            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 532            plt.draw()
 533            if save:
 534                fig.savefig(save + "_" + str(e))
 535
 536    def plot_rho(self, save=None):
 537        """Plot normalized autocorrelation function time for each ensemble.
 538
 539        Parameters
 540        ----------
 541        save : str
 542            saves the figure to a file named 'save' if.
 543        """
 544        if not hasattr(self, 'e_dvalue'):
 545            raise Exception('Run the gamma method first.')
 546        for e, e_name in enumerate(self.mc_names):
 547            fig = plt.figure()
 548            plt.xlabel('W')
 549            plt.ylabel('rho')
 550            length = int(len(self.e_drho[e_name]))
 551            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 552            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 553            if self.tau_exp[e_name] > 0:
 554                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 555                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 556                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 557                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 558            else:
 559                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 560                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 561            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 562            plt.xlim(-0.5, xmax)
 563            plt.draw()
 564            if save:
 565                fig.savefig(save + "_" + str(e))
 566
 567    def plot_rep_dist(self):
 568        """Plot replica distribution for each ensemble with more than one replicum."""
 569        if not hasattr(self, 'e_dvalue'):
 570            raise Exception('Run the gamma method first.')
 571        for e, e_name in enumerate(self.mc_names):
 572            if len(self.e_content[e_name]) == 1:
 573                print('No replica distribution for a single replicum (', e_name, ')')
 574                continue
 575            r_length = []
 576            sub_r_mean = 0
 577            for r, r_name in enumerate(self.e_content[e_name]):
 578                r_length.append(len(self.deltas[r_name]))
 579                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 580            e_N = np.sum(r_length)
 581            sub_r_mean /= e_N
 582            arr = np.zeros(len(self.e_content[e_name]))
 583            for r, r_name in enumerate(self.e_content[e_name]):
 584                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 585            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 586            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 587            plt.draw()
 588
 589    def plot_history(self, expand=True):
 590        """Plot derived Monte Carlo history for each ensemble
 591
 592        Parameters
 593        ----------
 594        expand : bool
 595            show expanded history for irregular Monte Carlo chains (default: True).
 596        """
 597        for e, e_name in enumerate(self.mc_names):
 598            plt.figure()
 599            r_length = []
 600            tmp = []
 601            tmp_expanded = []
 602            for r, r_name in enumerate(self.e_content[e_name]):
 603                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 604                if expand:
 605                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
 606                    r_length.append(len(tmp_expanded[-1]))
 607                else:
 608                    r_length.append(len(tmp[-1]))
 609            e_N = np.sum(r_length)
 610            x = np.arange(e_N)
 611            y_test = np.concatenate(tmp, axis=0)
 612            if expand:
 613                y = np.concatenate(tmp_expanded, axis=0)
 614            else:
 615                y = y_test
 616            plt.errorbar(x, y, fmt='.', markersize=3)
 617            plt.xlim(-0.5, e_N - 0.5)
 618            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 619            plt.draw()
 620
 621    def plot_piechart(self, save=None):
 622        """Plot piechart which shows the fractional contribution of each
 623        ensemble to the error and returns a dictionary containing the fractions.
 624
 625        Parameters
 626        ----------
 627        save : str
 628            saves the figure to a file named 'save' if.
 629        """
 630        if not hasattr(self, 'e_dvalue'):
 631            raise Exception('Run the gamma method first.')
 632        if np.isclose(0.0, self._dvalue, atol=1e-15):
 633            raise Exception('Error is 0.0')
 634        labels = self.e_names
 635        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 636        fig1, ax1 = plt.subplots()
 637        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 638        ax1.axis('equal')
 639        plt.draw()
 640        if save:
 641            fig1.savefig(save)
 642
 643        return dict(zip(self.e_names, sizes))
 644
 645    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 646        """Dump the Obs to a file 'name' of chosen format.
 647
 648        Parameters
 649        ----------
 650        filename : str
 651            name of the file to be saved.
 652        datatype : str
 653            Format of the exported file. Supported formats include
 654            "json.gz" and "pickle"
 655        description : str
 656            Description for output file, only relevant for json.gz format.
 657        path : str
 658            specifies a custom path for the file (default '.')
 659        """
 660        if 'path' in kwargs:
 661            file_name = kwargs.get('path') + '/' + filename
 662        else:
 663            file_name = filename
 664
 665        if datatype == "json.gz":
 666            from .input.json import dump_to_json
 667            dump_to_json([self], file_name, description=description)
 668        elif datatype == "pickle":
 669            with open(file_name + '.p', 'wb') as fb:
 670                pickle.dump(self, fb)
 671        else:
 672            raise Exception("Unknown datatype " + str(datatype))
 673
 674    def export_jackknife(self):
 675        """Export jackknife samples from the Obs
 676
 677        Returns
 678        -------
 679        numpy.ndarray
 680            Returns a numpy array of length N + 1 where N is the number of samples
 681            for the given ensemble and replicum. The zeroth entry of the array contains
 682            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 683            derived from the Obs. The current implementation only works for observables
 684            defined on exactly one ensemble and replicum. The derived jackknife samples
 685            should agree with samples from a full jackknife analysis up to O(1/N).
 686        """
 687
 688        if len(self.names) != 1:
 689            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 690
 691        name = self.names[0]
 692        full_data = self.deltas[name] + self.r_values[name]
 693        n = full_data.size
 694        mean = self.value
 695        tmp_jacks = np.zeros(n + 1)
 696        tmp_jacks[0] = mean
 697        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 698        return tmp_jacks
 699
 700    def __float__(self):
 701        return float(self.value)
 702
 703    def __repr__(self):
 704        return 'Obs[' + str(self) + ']'
 705
 706    def __str__(self):
 707        return _format_uncertainty(self.value, self._dvalue)
 708
 709    def __hash__(self):
 710        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 711        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 712        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 713        hash_tuple += tuple([o.encode() for o in self.names])
 714        m = hashlib.md5()
 715        [m.update(o) for o in hash_tuple]
 716        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 717
 718    # Overload comparisons
 719    def __lt__(self, other):
 720        return self.value < other
 721
 722    def __le__(self, other):
 723        return self.value <= other
 724
 725    def __gt__(self, other):
 726        return self.value > other
 727
 728    def __ge__(self, other):
 729        return self.value >= other
 730
 731    def __eq__(self, other):
 732        return (self - other).is_zero()
 733
 734    def __ne__(self, other):
 735        return not (self - other).is_zero()
 736
 737    # Overload math operations
 738    def __add__(self, y):
 739        if isinstance(y, Obs):
 740            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 741        else:
 742            if isinstance(y, np.ndarray):
 743                return np.array([self + o for o in y])
 744            elif y.__class__.__name__ in ['Corr', 'CObs']:
 745                return NotImplemented
 746            else:
 747                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 748
 749    def __radd__(self, y):
 750        return self + y
 751
 752    def __mul__(self, y):
 753        if isinstance(y, Obs):
 754            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 755        else:
 756            if isinstance(y, np.ndarray):
 757                return np.array([self * o for o in y])
 758            elif isinstance(y, complex):
 759                return CObs(self * y.real, self * y.imag)
 760            elif y.__class__.__name__ in ['Corr', 'CObs']:
 761                return NotImplemented
 762            else:
 763                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 764
 765    def __rmul__(self, y):
 766        return self * y
 767
 768    def __sub__(self, y):
 769        if isinstance(y, Obs):
 770            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 771        else:
 772            if isinstance(y, np.ndarray):
 773                return np.array([self - o for o in y])
 774            elif y.__class__.__name__ in ['Corr', 'CObs']:
 775                return NotImplemented
 776            else:
 777                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 778
 779    def __rsub__(self, y):
 780        return -1 * (self - y)
 781
 782    def __pos__(self):
 783        return self
 784
 785    def __neg__(self):
 786        return -1 * self
 787
 788    def __truediv__(self, y):
 789        if isinstance(y, Obs):
 790            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 791        else:
 792            if isinstance(y, np.ndarray):
 793                return np.array([self / o for o in y])
 794            elif y.__class__.__name__ in ['Corr', 'CObs']:
 795                return NotImplemented
 796            else:
 797                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 798
 799    def __rtruediv__(self, y):
 800        if isinstance(y, Obs):
 801            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 802        else:
 803            if isinstance(y, np.ndarray):
 804                return np.array([o / self for o in y])
 805            elif y.__class__.__name__ in ['Corr', 'CObs']:
 806                return NotImplemented
 807            else:
 808                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 809
 810    def __pow__(self, y):
 811        if isinstance(y, Obs):
 812            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 813        else:
 814            return derived_observable(lambda x: x[0] ** y, [self])
 815
 816    def __rpow__(self, y):
 817        if isinstance(y, Obs):
 818            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 819        else:
 820            return derived_observable(lambda x: y ** x[0], [self])
 821
 822    def __abs__(self):
 823        return derived_observable(lambda x: anp.abs(x[0]), [self])
 824
 825    # Overload numpy functions
 826    def sqrt(self):
 827        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 828
 829    def log(self):
 830        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 831
 832    def exp(self):
 833        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 834
 835    def sin(self):
 836        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 837
 838    def cos(self):
 839        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 840
 841    def tan(self):
 842        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 843
 844    def arcsin(self):
 845        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 846
 847    def arccos(self):
 848        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 849
 850    def arctan(self):
 851        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 852
 853    def sinh(self):
 854        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 855
 856    def cosh(self):
 857        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 858
 859    def tanh(self):
 860        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 861
 862    def arcsinh(self):
 863        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 864
 865    def arccosh(self):
 866        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 867
 868    def arctanh(self):
 869        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 870
 871
 872class CObs:
 873    """Class for a complex valued observable."""
 874    __slots__ = ['_real', '_imag', 'tag']
 875
 876    def __init__(self, real, imag=0.0):
 877        self._real = real
 878        self._imag = imag
 879        self.tag = None
 880
 881    @property
 882    def real(self):
 883        return self._real
 884
 885    @property
 886    def imag(self):
 887        return self._imag
 888
 889    def gamma_method(self, **kwargs):
 890        """Executes the gamma_method for the real and the imaginary part."""
 891        if isinstance(self.real, Obs):
 892            self.real.gamma_method(**kwargs)
 893        if isinstance(self.imag, Obs):
 894            self.imag.gamma_method(**kwargs)
 895
 896    def is_zero(self):
 897        """Checks whether both real and imaginary part are zero within machine precision."""
 898        return self.real == 0.0 and self.imag == 0.0
 899
 900    def conjugate(self):
 901        return CObs(self.real, -self.imag)
 902
 903    def __add__(self, other):
 904        if isinstance(other, np.ndarray):
 905            return other + self
 906        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 907            return CObs(self.real + other.real,
 908                        self.imag + other.imag)
 909        else:
 910            return CObs(self.real + other, self.imag)
 911
 912    def __radd__(self, y):
 913        return self + y
 914
 915    def __sub__(self, other):
 916        if isinstance(other, np.ndarray):
 917            return -1 * (other - self)
 918        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 919            return CObs(self.real - other.real, self.imag - other.imag)
 920        else:
 921            return CObs(self.real - other, self.imag)
 922
 923    def __rsub__(self, other):
 924        return -1 * (self - other)
 925
 926    def __mul__(self, other):
 927        if isinstance(other, np.ndarray):
 928            return other * self
 929        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 930            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 931                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 932                                               [self.real, other.real, self.imag, other.imag],
 933                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 934                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 935                                               [self.real, other.real, self.imag, other.imag],
 936                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 937            elif getattr(other, 'imag', 0) != 0:
 938                return CObs(self.real * other.real - self.imag * other.imag,
 939                            self.imag * other.real + self.real * other.imag)
 940            else:
 941                return CObs(self.real * other.real, self.imag * other.real)
 942        else:
 943            return CObs(self.real * other, self.imag * other)
 944
 945    def __rmul__(self, other):
 946        return self * other
 947
 948    def __truediv__(self, other):
 949        if isinstance(other, np.ndarray):
 950            return 1 / (other / self)
 951        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 952            r = other.real ** 2 + other.imag ** 2
 953            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 954        else:
 955            return CObs(self.real / other, self.imag / other)
 956
 957    def __rtruediv__(self, other):
 958        r = self.real ** 2 + self.imag ** 2
 959        if hasattr(other, 'real') and hasattr(other, 'imag'):
 960            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 961        else:
 962            return CObs(self.real * other / r, -self.imag * other / r)
 963
 964    def __abs__(self):
 965        return np.sqrt(self.real**2 + self.imag**2)
 966
 967    def __pos__(self):
 968        return self
 969
 970    def __neg__(self):
 971        return -1 * self
 972
 973    def __eq__(self, other):
 974        return self.real == other.real and self.imag == other.imag
 975
 976    def __str__(self):
 977        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 978
 979    def __repr__(self):
 980        return 'CObs[' + str(self) + ']'
 981
 982
 983def _format_uncertainty(value, dvalue):
 984    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
 985    if dvalue == 0.0:
 986        return str(value)
 987    fexp = np.floor(np.log10(dvalue))
 988    if fexp < 0.0:
 989        return '{:{form}}({:2.0f})'.format(value, dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
 990    elif fexp == 0.0:
 991        return '{:.1f}({:1.1f})'.format(value, dvalue)
 992    else:
 993        return '{:.0f}({:2.0f})'.format(value, dvalue)
 994
 995
 996def _expand_deltas(deltas, idx, shape):
 997    """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0.
 998       If idx is of type range, the deltas are not changed
 999
1000    Parameters
1001    ----------
1002    deltas : list
1003        List of fluctuations
1004    idx : list
1005        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1006    shape : int
1007        Number of configs in idx.
1008    """
1009    if isinstance(idx, range):
1010        return deltas
1011    else:
1012        ret = np.zeros(idx[-1] - idx[0] + 1)
1013        for i in range(shape):
1014            ret[idx[i] - idx[0]] = deltas[i]
1015        return ret
1016
1017
1018def _merge_idx(idl):
1019    """Returns the union of all lists in idl as sorted list
1020
1021    Parameters
1022    ----------
1023    idl : list
1024        List of lists or ranges.
1025    """
1026
1027    # Use groupby to efficiently check whether all elements of idl are identical
1028    try:
1029        g = groupby(idl)
1030        if next(g, True) and not next(g, False):
1031            return idl[0]
1032    except Exception:
1033        pass
1034
1035    if np.all([type(idx) is range for idx in idl]):
1036        if len(set([idx[0] for idx in idl])) == 1:
1037            idstart = min([idx.start for idx in idl])
1038            idstop = max([idx.stop for idx in idl])
1039            idstep = min([idx.step for idx in idl])
1040            return range(idstart, idstop, idstep)
1041
1042    return sorted(set().union(*idl))
1043
1044
1045def _intersection_idx(idl):
1046    """Returns the intersection of all lists in idl as sorted list
1047
1048    Parameters
1049    ----------
1050    idl : list
1051        List of lists or ranges.
1052    """
1053
1054    def _lcm(*args):
1055        """Returns the lowest common multiple of args.
1056
1057        From python 3.9 onwards the math library contains an lcm function."""
1058        return reduce(lambda a, b: a * b // gcd(a, b), args)
1059
1060    # Use groupby to efficiently check whether all elements of idl are identical
1061    try:
1062        g = groupby(idl)
1063        if next(g, True) and not next(g, False):
1064            return idl[0]
1065    except Exception:
1066        pass
1067
1068    if np.all([type(idx) is range for idx in idl]):
1069        if len(set([idx[0] for idx in idl])) == 1:
1070            idstart = max([idx.start for idx in idl])
1071            idstop = min([idx.stop for idx in idl])
1072            idstep = _lcm(*[idx.step for idx in idl])
1073            return range(idstart, idstop, idstep)
1074
1075    return sorted(set.intersection(*[set(o) for o in idl]))
1076
1077
1078def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1079    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1080       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1081       common divisor of the step sizes is used as new step size.
1082
1083    Parameters
1084    ----------
1085    deltas : list
1086        List of fluctuations
1087    idx : list
1088        List or range of configs on which the deltas are defined.
1089        Has to be a subset of new_idx and has to be sorted in ascending order.
1090    shape : list
1091        Number of configs in idx.
1092    new_idx : list
1093        List of configs that defines the new range, has to be sorted in ascending order.
1094    """
1095
1096    if type(idx) is range and type(new_idx) is range:
1097        if idx == new_idx:
1098            return deltas
1099    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1100    for i in range(shape):
1101        ret[idx[i] - new_idx[0]] = deltas[i]
1102    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx)
1103
1104
1105def derived_observable(func, data, array_mode=False, **kwargs):
1106    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1107
1108    Parameters
1109    ----------
1110    func : object
1111        arbitrary function of the form func(data, **kwargs). For the
1112        automatic differentiation to work, all numpy functions have to have
1113        the autograd wrapper (use 'import autograd.numpy as anp').
1114    data : list
1115        list of Obs, e.g. [obs1, obs2, obs3].
1116    num_grad : bool
1117        if True, numerical derivatives are used instead of autograd
1118        (default False). To control the numerical differentiation the
1119        kwargs of numdifftools.step_generators.MaxStepGenerator
1120        can be used.
1121    man_grad : list
1122        manually supply a list or an array which contains the jacobian
1123        of func. Use cautiously, supplying the wrong derivative will
1124        not be intercepted.
1125
1126    Notes
1127    -----
1128    For simple mathematical operations it can be practical to use anonymous
1129    functions. For the ratio of two observables one can e.g. use
1130
1131    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1132    """
1133
1134    data = np.asarray(data)
1135    raveled_data = data.ravel()
1136
1137    # Workaround for matrix operations containing non Obs data
1138    if not all(isinstance(x, Obs) for x in raveled_data):
1139        for i in range(len(raveled_data)):
1140            if isinstance(raveled_data[i], (int, float)):
1141                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1142
1143    allcov = {}
1144    for o in raveled_data:
1145        for name in o.cov_names:
1146            if name in allcov:
1147                if not np.allclose(allcov[name], o.covobs[name].cov):
1148                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1149            else:
1150                allcov[name] = o.covobs[name].cov
1151
1152    n_obs = len(raveled_data)
1153    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1154    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1155    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1156
1157    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1158
1159    if data.ndim == 1:
1160        values = np.array([o.value for o in data])
1161    else:
1162        values = np.vectorize(lambda x: x.value)(data)
1163
1164    new_values = func(values, **kwargs)
1165
1166    multi = int(isinstance(new_values, np.ndarray))
1167
1168    new_r_values = {}
1169    new_idl_d = {}
1170    for name in new_sample_names:
1171        idl = []
1172        tmp_values = np.zeros(n_obs)
1173        for i, item in enumerate(raveled_data):
1174            tmp_values[i] = item.r_values.get(name, item.value)
1175            tmp_idl = item.idl.get(name)
1176            if tmp_idl is not None:
1177                idl.append(tmp_idl)
1178        if multi > 0:
1179            tmp_values = np.array(tmp_values).reshape(data.shape)
1180        new_r_values[name] = func(tmp_values, **kwargs)
1181        new_idl_d[name] = _merge_idx(idl)
1182
1183    if 'man_grad' in kwargs:
1184        deriv = np.asarray(kwargs.get('man_grad'))
1185        if new_values.shape + data.shape != deriv.shape:
1186            raise Exception('Manual derivative does not have correct shape.')
1187    elif kwargs.get('num_grad') is True:
1188        if multi > 0:
1189            raise Exception('Multi mode currently not supported for numerical derivative')
1190        options = {
1191            'base_step': 0.1,
1192            'step_ratio': 2.5}
1193        for key in options.keys():
1194            kwarg = kwargs.get(key)
1195            if kwarg is not None:
1196                options[key] = kwarg
1197        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1198        if tmp_df.size == 1:
1199            deriv = np.array([tmp_df.real])
1200        else:
1201            deriv = tmp_df.real
1202    else:
1203        deriv = jacobian(func)(values, **kwargs)
1204
1205    final_result = np.zeros(new_values.shape, dtype=object)
1206
1207    if array_mode is True:
1208
1209        class _Zero_grad():
1210            def __init__(self, N):
1211                self.grad = np.zeros((N, 1))
1212
1213        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1214        d_extracted = {}
1215        g_extracted = {}
1216        for name in new_sample_names:
1217            d_extracted[name] = []
1218            ens_length = len(new_idl_d[name])
1219            for i_dat, dat in enumerate(data):
1220                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1221        for name in new_cov_names:
1222            g_extracted[name] = []
1223            zero_grad = _Zero_grad(new_covobs_lengths[name])
1224            for i_dat, dat in enumerate(data):
1225                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1226
1227    for i_val, new_val in np.ndenumerate(new_values):
1228        new_deltas = {}
1229        new_grad = {}
1230        if array_mode is True:
1231            for name in new_sample_names:
1232                ens_length = d_extracted[name][0].shape[-1]
1233                new_deltas[name] = np.zeros(ens_length)
1234                for i_dat, dat in enumerate(d_extracted[name]):
1235                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1236            for name in new_cov_names:
1237                new_grad[name] = 0
1238                for i_dat, dat in enumerate(g_extracted[name]):
1239                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1240        else:
1241            for j_obs, obs in np.ndenumerate(data):
1242                for name in obs.names:
1243                    if name in obs.cov_names:
1244                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1245                    else:
1246                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1247
1248        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1249
1250        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1251            raise Exception('The same name has been used for deltas and covobs!')
1252        new_samples = []
1253        new_means = []
1254        new_idl = []
1255        new_names_obs = []
1256        for name in new_names:
1257            if name not in new_covobs:
1258                new_samples.append(new_deltas[name])
1259                new_idl.append(new_idl_d[name])
1260                new_means.append(new_r_values[name][i_val])
1261                new_names_obs.append(name)
1262        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1263        for name in new_covobs:
1264            final_result[i_val].names.append(name)
1265        final_result[i_val]._covobs = new_covobs
1266        final_result[i_val]._value = new_val
1267        final_result[i_val].reweighted = reweighted
1268
1269    if multi == 0:
1270        final_result = final_result.item()
1271
1272    return final_result
1273
1274
1275def _reduce_deltas(deltas, idx_old, idx_new):
1276    """Extract deltas defined on idx_old on all configs of idx_new.
1277
1278    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1279    are ordered in an ascending order.
1280
1281    Parameters
1282    ----------
1283    deltas : list
1284        List of fluctuations
1285    idx_old : list
1286        List or range of configs on which the deltas are defined
1287    idx_new : list
1288        List of configs for which we want to extract the deltas.
1289        Has to be a subset of idx_old.
1290    """
1291    if not len(deltas) == len(idx_old):
1292        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1293    if type(idx_old) is range and type(idx_new) is range:
1294        if idx_old == idx_new:
1295            return deltas
1296    # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical
1297    try:
1298        g = groupby([idx_old, idx_new])
1299        if next(g, True) and not next(g, False):
1300            return deltas
1301    except Exception:
1302        pass
1303    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1304    if len(indices) < len(idx_new):
1305        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1306    return np.array(deltas)[indices]
1307
1308
1309def reweight(weight, obs, **kwargs):
1310    """Reweight a list of observables.
1311
1312    Parameters
1313    ----------
1314    weight : Obs
1315        Reweighting factor. An Observable that has to be defined on a superset of the
1316        configurations in obs[i].idl for all i.
1317    obs : list
1318        list of Obs, e.g. [obs1, obs2, obs3].
1319    all_configs : bool
1320        if True, the reweighted observables are normalized by the average of
1321        the reweighting factor on all configurations in weight.idl and not
1322        on the configurations in obs[i].idl. Default False.
1323    """
1324    result = []
1325    for i in range(len(obs)):
1326        if len(obs[i].cov_names):
1327            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1328        if not set(obs[i].names).issubset(weight.names):
1329            raise Exception('Error: Ensembles do not fit')
1330        for name in obs[i].names:
1331            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1332                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1333        new_samples = []
1334        w_deltas = {}
1335        for name in sorted(obs[i].names):
1336            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1337            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1338        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1339
1340        if kwargs.get('all_configs'):
1341            new_weight = weight
1342        else:
1343            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1344
1345        result.append(tmp_obs / new_weight)
1346        result[-1].reweighted = True
1347
1348    return result
1349
1350
1351def correlate(obs_a, obs_b):
1352    """Correlate two observables.
1353
1354    Parameters
1355    ----------
1356    obs_a : Obs
1357        First observable
1358    obs_b : Obs
1359        Second observable
1360
1361    Notes
1362    -----
1363    Keep in mind to only correlate primary observables which have not been reweighted
1364    yet. The reweighting has to be applied after correlating the observables.
1365    Currently only works if ensembles are identical (this is not strictly necessary).
1366    """
1367
1368    if sorted(obs_a.names) != sorted(obs_b.names):
1369        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1370    if len(obs_a.cov_names) or len(obs_b.cov_names):
1371        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1372    for name in obs_a.names:
1373        if obs_a.shape[name] != obs_b.shape[name]:
1374            raise Exception('Shapes of ensemble', name, 'do not fit')
1375        if obs_a.idl[name] != obs_b.idl[name]:
1376            raise Exception('idl of ensemble', name, 'do not fit')
1377
1378    if obs_a.reweighted is True:
1379        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1380    if obs_b.reweighted is True:
1381        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1382
1383    new_samples = []
1384    new_idl = []
1385    for name in sorted(obs_a.names):
1386        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1387        new_idl.append(obs_a.idl[name])
1388
1389    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1390    o.reweighted = obs_a.reweighted or obs_b.reweighted
1391    return o
1392
1393
1394def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1395    r'''Calculates the error covariance matrix of a set of observables.
1396
1397    WARNING: This function should be used with care, especially for observables with support on multiple
1398             ensembles with differing autocorrelations. See the notes below for details.
1399
1400    The gamma method has to be applied first to all observables.
1401
1402    Parameters
1403    ----------
1404    obs : list or numpy.ndarray
1405        List or one dimensional array of Obs
1406    visualize : bool
1407        If True plots the corresponding normalized correlation matrix (default False).
1408    correlation : bool
1409        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1410    smooth : None or int
1411        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1412        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1413        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1414        small ones.
1415
1416    Notes
1417    -----
1418    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1419    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1420    in the absence of autocorrelation.
1421    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1422    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1423    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1424    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1425    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1426    '''
1427
1428    length = len(obs)
1429
1430    max_samples = np.max([o.N for o in obs])
1431    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1432        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1433
1434    cov = np.zeros((length, length))
1435    for i in range(length):
1436        for j in range(i, length):
1437            cov[i, j] = _covariance_element(obs[i], obs[j])
1438    cov = cov + cov.T - np.diag(np.diag(cov))
1439
1440    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1441
1442    if isinstance(smooth, int):
1443        corr = _smooth_eigenvalues(corr, smooth)
1444
1445    if visualize:
1446        plt.matshow(corr, vmin=-1, vmax=1)
1447        plt.set_cmap('RdBu')
1448        plt.colorbar()
1449        plt.draw()
1450
1451    if correlation is True:
1452        return corr
1453
1454    errors = [o.dvalue for o in obs]
1455    cov = np.diag(errors) @ corr @ np.diag(errors)
1456
1457    eigenvalues = np.linalg.eigh(cov)[0]
1458    if not np.all(eigenvalues >= 0):
1459        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1460
1461    return cov
1462
1463
1464def _smooth_eigenvalues(corr, E):
1465    """Eigenvalue smoothing as described in hep-lat/9412087
1466
1467    corr : np.ndarray
1468        correlation matrix
1469    E : integer
1470        Number of eigenvalues to be left substantially unchanged
1471    """
1472    if not (2 < E < corr.shape[0] - 1):
1473        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1474    vals, vec = np.linalg.eigh(corr)
1475    lambda_min = np.mean(vals[:-E])
1476    vals[vals < lambda_min] = lambda_min
1477    vals /= np.mean(vals)
1478    return vec @ np.diag(vals) @ vec.T
1479
1480
1481def _covariance_element(obs1, obs2):
1482    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1483
1484    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1485        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1486        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1487        return np.sum(deltas1 * deltas2)
1488
1489    if set(obs1.names).isdisjoint(set(obs2.names)):
1490        return 0.0
1491
1492    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1493        raise Exception('The gamma method has to be applied to both Obs first.')
1494
1495    dvalue = 0.0
1496
1497    for e_name in obs1.mc_names:
1498
1499        if e_name not in obs2.mc_names:
1500            continue
1501
1502        idl_d = {}
1503        for r_name in obs1.e_content[e_name]:
1504            if r_name not in obs2.e_content[e_name]:
1505                continue
1506            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1507
1508        gamma = 0.0
1509
1510        for r_name in obs1.e_content[e_name]:
1511            if r_name not in obs2.e_content[e_name]:
1512                continue
1513            if len(idl_d[r_name]) == 0:
1514                continue
1515            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1516
1517        if gamma == 0.0:
1518            continue
1519
1520        gamma_div = 0.0
1521        for r_name in obs1.e_content[e_name]:
1522            if r_name not in obs2.e_content[e_name]:
1523                continue
1524            if len(idl_d[r_name]) == 0:
1525                continue
1526            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1527        gamma /= gamma_div
1528
1529        dvalue += gamma
1530
1531    for e_name in obs1.cov_names:
1532
1533        if e_name not in obs2.cov_names:
1534            continue
1535
1536        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1537
1538    return dvalue
1539
1540
1541def import_jackknife(jacks, name, idl=None):
1542    """Imports jackknife samples and returns an Obs
1543
1544    Parameters
1545    ----------
1546    jacks : numpy.ndarray
1547        numpy array containing the mean value as zeroth entry and
1548        the N jackknife samples as first to Nth entry.
1549    name : str
1550        name of the ensemble the samples are defined on.
1551    """
1552    length = len(jacks) - 1
1553    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1554    samples = jacks[1:] @ prj
1555    mean = np.mean(samples)
1556    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1557    new_obs._value = jacks[0]
1558    return new_obs
1559
1560
1561def merge_obs(list_of_obs):
1562    """Combine all observables in list_of_obs into one new observable
1563
1564    Parameters
1565    ----------
1566    list_of_obs : list
1567        list of the Obs object to be combined
1568
1569    Notes
1570    -----
1571    It is not possible to combine obs which are based on the same replicum
1572    """
1573    replist = [item for obs in list_of_obs for item in obs.names]
1574    if (len(replist) == len(set(replist))) is False:
1575        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1576    if any([len(o.cov_names) for o in list_of_obs]):
1577        raise Exception('Not possible to merge data that contains covobs!')
1578    new_dict = {}
1579    idl_dict = {}
1580    for o in list_of_obs:
1581        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1582                        for key in set(o.deltas) | set(o.r_values)})
1583        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1584
1585    names = sorted(new_dict.keys())
1586    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1587    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1588    return o
1589
1590
1591def cov_Obs(means, cov, name, grad=None):
1592    """Create an Obs based on mean(s) and a covariance matrix
1593
1594    Parameters
1595    ----------
1596    mean : list of floats or float
1597        N mean value(s) of the new Obs
1598    cov : list or array
1599        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1600    name : str
1601        identifier for the covariance matrix
1602    grad : list or array
1603        Gradient of the Covobs wrt. the means belonging to cov.
1604    """
1605
1606    def covobs_to_obs(co):
1607        """Make an Obs out of a Covobs
1608
1609        Parameters
1610        ----------
1611        co : Covobs
1612            Covobs to be embedded into the Obs
1613        """
1614        o = Obs([], [], means=[])
1615        o._value = co.value
1616        o.names.append(co.name)
1617        o._covobs[co.name] = co
1618        o._dvalue = np.sqrt(co.errsq())
1619        return o
1620
1621    ol = []
1622    if isinstance(means, (float, int)):
1623        means = [means]
1624
1625    for i in range(len(means)):
1626        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1627    if ol[0].covobs[name].N != len(means):
1628        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1629    if len(ol) == 1:
1630        return ol[0]
1631    return ol
class Obs:
 20class Obs:
 21    """Class for a general observable.
 22
 23    Instances of Obs are the basic objects of a pyerrors error analysis.
 24    They are initialized with a list which contains arrays of samples for
 25    different ensembles/replica and another list of same length which contains
 26    the names of the ensembles/replica. Mathematical operations can be
 27    performed on instances. The result is another instance of Obs. The error of
 28    an instance can be computed with the gamma_method. Also contains additional
 29    methods for output and visualization of the error calculation.
 30
 31    Attributes
 32    ----------
 33    S_global : float
 34        Standard value for S (default 2.0)
 35    S_dict : dict
 36        Dictionary for S values. If an entry for a given ensemble
 37        exists this overwrites the standard value for that ensemble.
 38    tau_exp_global : float
 39        Standard value for tau_exp (default 0.0)
 40    tau_exp_dict : dict
 41        Dictionary for tau_exp values. If an entry for a given ensemble exists
 42        this overwrites the standard value for that ensemble.
 43    N_sigma_global : float
 44        Standard value for N_sigma (default 1.0)
 45    N_sigma_dict : dict
 46        Dictionary for N_sigma values. If an entry for a given ensemble exists
 47        this overwrites the standard value for that ensemble.
 48    """
 49    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 50                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 51                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 52                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 53                 'idl', 'tag', '_covobs', '__dict__']
 54
 55    S_global = 2.0
 56    S_dict = {}
 57    tau_exp_global = 0.0
 58    tau_exp_dict = {}
 59    N_sigma_global = 1.0
 60    N_sigma_dict = {}
 61
 62    def __init__(self, samples, names, idl=None, **kwargs):
 63        """ Initialize Obs object.
 64
 65        Parameters
 66        ----------
 67        samples : list
 68            list of numpy arrays containing the Monte Carlo samples
 69        names : list
 70            list of strings labeling the individual samples
 71        idl : list, optional
 72            list of ranges or lists on which the samples are defined
 73        """
 74
 75        if kwargs.get("means") is None and len(samples):
 76            if len(samples) != len(names):
 77                raise ValueError('Length of samples and names incompatible.')
 78            if idl is not None:
 79                if len(idl) != len(names):
 80                    raise ValueError('Length of idl incompatible with samples and names.')
 81            name_length = len(names)
 82            if name_length > 1:
 83                if name_length != len(set(names)):
 84                    raise ValueError('Names are not unique.')
 85                if not all(isinstance(x, str) for x in names):
 86                    raise TypeError('All names have to be strings.')
 87            else:
 88                if not isinstance(names[0], str):
 89                    raise TypeError('All names have to be strings.')
 90            if min(len(x) for x in samples) <= 4:
 91                raise ValueError('Samples have to have at least 5 entries.')
 92
 93        self.names = sorted(names)
 94        self.shape = {}
 95        self.r_values = {}
 96        self.deltas = {}
 97        self._covobs = {}
 98
 99        self._value = 0
100        self.N = 0
101        self.idl = {}
102        if idl is not None:
103            for name, idx in sorted(zip(names, idl)):
104                if isinstance(idx, range):
105                    self.idl[name] = idx
106                elif isinstance(idx, (list, np.ndarray)):
107                    dc = np.unique(np.diff(idx))
108                    if np.any(dc < 0):
109                        raise ValueError("Unsorted idx for idl[%s]" % (name))
110                    if len(dc) == 1:
111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
112                    else:
113                        self.idl[name] = list(idx)
114                else:
115                    raise TypeError('incompatible type for idl[%s].' % (name))
116        else:
117            for name, sample in sorted(zip(names, samples)):
118                self.idl[name] = range(1, len(sample) + 1)
119
120        if kwargs.get("means") is not None:
121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
122                self.shape[name] = len(self.idl[name])
123                self.N += self.shape[name]
124                self.r_values[name] = mean
125                self.deltas[name] = sample
126        else:
127            for name, sample in sorted(zip(names, samples)):
128                self.shape[name] = len(self.idl[name])
129                self.N += self.shape[name]
130                if len(sample) != self.shape[name]:
131                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
132                self.r_values[name] = np.mean(sample)
133                self.deltas[name] = sample - self.r_values[name]
134                self._value += self.shape[name] * self.r_values[name]
135            self._value /= self.N
136
137        self._dvalue = 0.0
138        self.ddvalue = 0.0
139        self.reweighted = False
140
141        self.tag = None
142
143    @property
144    def value(self):
145        return self._value
146
147    @property
148    def dvalue(self):
149        return self._dvalue
150
151    @property
152    def e_names(self):
153        return sorted(set([o.split('|')[0] for o in self.names]))
154
155    @property
156    def cov_names(self):
157        return sorted(set([o for o in self.covobs.keys()]))
158
159    @property
160    def mc_names(self):
161        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
162
163    @property
164    def e_content(self):
165        res = {}
166        for e, e_name in enumerate(self.e_names):
167            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
168            if e_name in self.names:
169                res[e_name].append(e_name)
170        return res
171
172    @property
173    def covobs(self):
174        return self._covobs
175
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]))
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            gaps = []
273            for r_name in e_content[e_name]:
274                if isinstance(self.idl[r_name], range):
275                    gaps.append(1)
276                else:
277                    gaps.append(np.min(np.diff(self.idl[r_name])))
278
279            if not np.all([gi == gaps[0] for gi in gaps]):
280                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
281            else:
282                gapsize = gaps[0]
283
284            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
285            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
286            # Make sure no entry of tauint is smaller than 0.5
287            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
288            # hep-lat/0306017 eq. (42)
289            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
290            self.e_n_dtauint[e_name][0] = 0.0
291
292            def _compute_drho(i):
293                tmp = (self.e_rho[e_name][i + 1:w_max]
294                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1],
295                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
296                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
297                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
298
299            if self.tau_exp[e_name] > 0:
300                _compute_drho(gapsize)
301                texp = self.tau_exp[e_name]
302                # Critical slowing down analysis
303                if w_max // 2 <= 1:
304                    raise Exception("Need at least 8 samples for tau_exp error analysis")
305                for n in range(gapsize, w_max // 2, gapsize):
306                    _compute_drho(n + gapsize)
307                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
308                        # Bias correction hep-lat/0306017 eq. (49) included
309                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
310                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
311                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
312                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
313                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
314                        self.e_windowsize[e_name] = n
315                        break
316            else:
317                if self.S[e_name] == 0.0:
318                    self.e_tauint[e_name] = 0.5
319                    self.e_dtauint[e_name] = 0.0
320                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
321                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
322                    self.e_windowsize[e_name] = 0
323                else:
324                    # Standard automatic windowing procedure
325                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
326                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
327                    for n in range(1, w_max // gapsize):
328                        if g_w[n - 1] < 0 or n >= w_max // gapsize - 1:
329                            _compute_drho(gapsize * n)
330                            n *= gapsize
331                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
332                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
333                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
334                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
335                            self.e_windowsize[e_name] = n
336                            break
337
338            self._dvalue += self.e_dvalue[e_name] ** 2
339            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
340
341        for e_name in self.cov_names:
342            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
343            self.e_ddvalue[e_name] = 0
344            self._dvalue += self.e_dvalue[e_name]**2
345
346        self._dvalue = np.sqrt(self._dvalue)
347        if self._dvalue == 0.0:
348            self.ddvalue = 0.0
349        else:
350            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
351        return
352
353    gm = gamma_method
354
355    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
356        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
357           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
358
359        Parameters
360        ----------
361        deltas : list
362            List of fluctuations
363        idx : list
364            List or range of configurations on which the deltas are defined.
365        shape : int
366            Number of configurations in idx.
367        w_max : int
368            Upper bound for the summation window.
369        fft : bool
370            determines whether the fft algorithm is used for the computation
371            of the autocorrelation function.
372        """
373        gamma = np.zeros(w_max)
374        deltas = _expand_deltas(deltas, idx, shape)
375        new_shape = len(deltas)
376        if fft:
377            max_gamma = min(new_shape, w_max)
378            # The padding for the fft has to be even
379            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
380            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
381        else:
382            for n in range(w_max):
383                if new_shape - n >= 0:
384                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
385
386        return gamma
387
388    def details(self, ens_content=True):
389        """Output detailed properties of the Obs.
390
391        Parameters
392        ----------
393        ens_content : bool
394            print details about the ensembles and replica if true.
395        """
396        if self.tag is not None:
397            print("Description:", self.tag)
398        if not hasattr(self, 'e_dvalue'):
399            print('Result\t %3.8e' % (self.value))
400        else:
401            if self.value == 0.0:
402                percentage = np.nan
403            else:
404                percentage = np.abs(self._dvalue / self.value) * 100
405            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
406            if len(self.e_names) > 1:
407                print(' Ensemble errors:')
408            e_content = self.e_content
409            for e_name in self.mc_names:
410                if isinstance(self.idl[e_content[e_name][0]], range):
411                    gap = self.idl[e_content[e_name][0]].step
412                else:
413                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
414
415                if len(self.e_names) > 1:
416                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
417                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
418                tau_string += f" in units of {gap} config"
419                if gap > 1:
420                    tau_string += "s"
421                if self.tau_exp[e_name] > 0:
422                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
423                else:
424                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
425                print(tau_string)
426            for e_name in self.cov_names:
427                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
428        if ens_content is True:
429            if len(self.e_names) == 1:
430                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
431            else:
432                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
433            my_string_list = []
434            for key, value in sorted(self.e_content.items()):
435                if key not in self.covobs:
436                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
437                    if len(value) == 1:
438                        my_string += f': {self.shape[value[0]]} configurations'
439                        if isinstance(self.idl[value[0]], range):
440                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
441                        else:
442                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
443                    else:
444                        sublist = []
445                        for v in value:
446                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
447                            my_substring += f': {self.shape[v]} configurations'
448                            if isinstance(self.idl[v], range):
449                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
450                            else:
451                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
452                            sublist.append(my_substring)
453
454                        my_string += '\n' + '\n'.join(sublist)
455                else:
456                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
457                my_string_list.append(my_string)
458            print('\n'.join(my_string_list))
459
460    def reweight(self, weight):
461        """Reweight the obs with given rewighting factors.
462
463        Parameters
464        ----------
465        weight : Obs
466            Reweighting factor. An Observable that has to be defined on a superset of the
467            configurations in obs[i].idl for all i.
468        all_configs : bool
469            if True, the reweighted observables are normalized by the average of
470            the reweighting factor on all configurations in weight.idl and not
471            on the configurations in obs[i].idl. Default False.
472        """
473        return reweight(weight, [self])[0]
474
475    def is_zero_within_error(self, sigma=1):
476        """Checks whether the observable is zero within 'sigma' standard errors.
477
478        Parameters
479        ----------
480        sigma : int
481            Number of standard errors used for the check.
482
483        Works only properly when the gamma method was run.
484        """
485        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
486
487    def is_zero(self, atol=1e-10):
488        """Checks whether the observable is zero within a given tolerance.
489
490        Parameters
491        ----------
492        atol : float
493            Absolute tolerance (for details see numpy documentation).
494        """
495        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
496
497    def plot_tauint(self, save=None):
498        """Plot integrated autocorrelation time for each ensemble.
499
500        Parameters
501        ----------
502        save : str
503            saves the figure to a file named 'save' if.
504        """
505        if not hasattr(self, 'e_dvalue'):
506            raise Exception('Run the gamma method first.')
507
508        for e, e_name in enumerate(self.mc_names):
509            fig = plt.figure()
510            plt.xlabel(r'$W$')
511            plt.ylabel(r'$\tau_\mathrm{int}$')
512            length = int(len(self.e_n_tauint[e_name]))
513            if self.tau_exp[e_name] > 0:
514                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
515                x_help = np.arange(2 * self.tau_exp[e_name])
516                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
517                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
518                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
519                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
520                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
521                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
522                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
523            else:
524                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
525                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
526
527            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
528            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
529            plt.legend()
530            plt.xlim(-0.5, xmax)
531            ylim = plt.ylim()
532            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
533            plt.draw()
534            if save:
535                fig.savefig(save + "_" + str(e))
536
537    def plot_rho(self, save=None):
538        """Plot normalized autocorrelation function time for each ensemble.
539
540        Parameters
541        ----------
542        save : str
543            saves the figure to a file named 'save' if.
544        """
545        if not hasattr(self, 'e_dvalue'):
546            raise Exception('Run the gamma method first.')
547        for e, e_name in enumerate(self.mc_names):
548            fig = plt.figure()
549            plt.xlabel('W')
550            plt.ylabel('rho')
551            length = int(len(self.e_drho[e_name]))
552            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
553            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
554            if self.tau_exp[e_name] > 0:
555                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
556                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
557                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
558                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
559            else:
560                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
561                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
562            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
563            plt.xlim(-0.5, xmax)
564            plt.draw()
565            if save:
566                fig.savefig(save + "_" + str(e))
567
568    def plot_rep_dist(self):
569        """Plot replica distribution for each ensemble with more than one replicum."""
570        if not hasattr(self, 'e_dvalue'):
571            raise Exception('Run the gamma method first.')
572        for e, e_name in enumerate(self.mc_names):
573            if len(self.e_content[e_name]) == 1:
574                print('No replica distribution for a single replicum (', e_name, ')')
575                continue
576            r_length = []
577            sub_r_mean = 0
578            for r, r_name in enumerate(self.e_content[e_name]):
579                r_length.append(len(self.deltas[r_name]))
580                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
581            e_N = np.sum(r_length)
582            sub_r_mean /= e_N
583            arr = np.zeros(len(self.e_content[e_name]))
584            for r, r_name in enumerate(self.e_content[e_name]):
585                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
586            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
587            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
588            plt.draw()
589
590    def plot_history(self, expand=True):
591        """Plot derived Monte Carlo history for each ensemble
592
593        Parameters
594        ----------
595        expand : bool
596            show expanded history for irregular Monte Carlo chains (default: True).
597        """
598        for e, e_name in enumerate(self.mc_names):
599            plt.figure()
600            r_length = []
601            tmp = []
602            tmp_expanded = []
603            for r, r_name in enumerate(self.e_content[e_name]):
604                tmp.append(self.deltas[r_name] + self.r_values[r_name])
605                if expand:
606                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
607                    r_length.append(len(tmp_expanded[-1]))
608                else:
609                    r_length.append(len(tmp[-1]))
610            e_N = np.sum(r_length)
611            x = np.arange(e_N)
612            y_test = np.concatenate(tmp, axis=0)
613            if expand:
614                y = np.concatenate(tmp_expanded, axis=0)
615            else:
616                y = y_test
617            plt.errorbar(x, y, fmt='.', markersize=3)
618            plt.xlim(-0.5, e_N - 0.5)
619            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
620            plt.draw()
621
622    def plot_piechart(self, save=None):
623        """Plot piechart which shows the fractional contribution of each
624        ensemble to the error and returns a dictionary containing the fractions.
625
626        Parameters
627        ----------
628        save : str
629            saves the figure to a file named 'save' if.
630        """
631        if not hasattr(self, 'e_dvalue'):
632            raise Exception('Run the gamma method first.')
633        if np.isclose(0.0, self._dvalue, atol=1e-15):
634            raise Exception('Error is 0.0')
635        labels = self.e_names
636        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
637        fig1, ax1 = plt.subplots()
638        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
639        ax1.axis('equal')
640        plt.draw()
641        if save:
642            fig1.savefig(save)
643
644        return dict(zip(self.e_names, sizes))
645
646    def dump(self, filename, datatype="json.gz", description="", **kwargs):
647        """Dump the Obs to a file 'name' of chosen format.
648
649        Parameters
650        ----------
651        filename : str
652            name of the file to be saved.
653        datatype : str
654            Format of the exported file. Supported formats include
655            "json.gz" and "pickle"
656        description : str
657            Description for output file, only relevant for json.gz format.
658        path : str
659            specifies a custom path for the file (default '.')
660        """
661        if 'path' in kwargs:
662            file_name = kwargs.get('path') + '/' + filename
663        else:
664            file_name = filename
665
666        if datatype == "json.gz":
667            from .input.json import dump_to_json
668            dump_to_json([self], file_name, description=description)
669        elif datatype == "pickle":
670            with open(file_name + '.p', 'wb') as fb:
671                pickle.dump(self, fb)
672        else:
673            raise Exception("Unknown datatype " + str(datatype))
674
675    def export_jackknife(self):
676        """Export jackknife samples from the Obs
677
678        Returns
679        -------
680        numpy.ndarray
681            Returns a numpy array of length N + 1 where N is the number of samples
682            for the given ensemble and replicum. The zeroth entry of the array contains
683            the mean value of the Obs, entries 1 to N contain the N jackknife samples
684            derived from the Obs. The current implementation only works for observables
685            defined on exactly one ensemble and replicum. The derived jackknife samples
686            should agree with samples from a full jackknife analysis up to O(1/N).
687        """
688
689        if len(self.names) != 1:
690            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
691
692        name = self.names[0]
693        full_data = self.deltas[name] + self.r_values[name]
694        n = full_data.size
695        mean = self.value
696        tmp_jacks = np.zeros(n + 1)
697        tmp_jacks[0] = mean
698        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
699        return tmp_jacks
700
701    def __float__(self):
702        return float(self.value)
703
704    def __repr__(self):
705        return 'Obs[' + str(self) + ']'
706
707    def __str__(self):
708        return _format_uncertainty(self.value, self._dvalue)
709
710    def __hash__(self):
711        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
712        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
713        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
714        hash_tuple += tuple([o.encode() for o in self.names])
715        m = hashlib.md5()
716        [m.update(o) for o in hash_tuple]
717        return int(m.hexdigest(), 16) & 0xFFFFFFFF
718
719    # Overload comparisons
720    def __lt__(self, other):
721        return self.value < other
722
723    def __le__(self, other):
724        return self.value <= other
725
726    def __gt__(self, other):
727        return self.value > other
728
729    def __ge__(self, other):
730        return self.value >= other
731
732    def __eq__(self, other):
733        return (self - other).is_zero()
734
735    def __ne__(self, other):
736        return not (self - other).is_zero()
737
738    # Overload math operations
739    def __add__(self, y):
740        if isinstance(y, Obs):
741            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
742        else:
743            if isinstance(y, np.ndarray):
744                return np.array([self + o for o in y])
745            elif y.__class__.__name__ in ['Corr', 'CObs']:
746                return NotImplemented
747            else:
748                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
749
750    def __radd__(self, y):
751        return self + y
752
753    def __mul__(self, y):
754        if isinstance(y, Obs):
755            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
756        else:
757            if isinstance(y, np.ndarray):
758                return np.array([self * o for o in y])
759            elif isinstance(y, complex):
760                return CObs(self * y.real, self * y.imag)
761            elif y.__class__.__name__ in ['Corr', 'CObs']:
762                return NotImplemented
763            else:
764                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
765
766    def __rmul__(self, y):
767        return self * y
768
769    def __sub__(self, y):
770        if isinstance(y, Obs):
771            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
772        else:
773            if isinstance(y, np.ndarray):
774                return np.array([self - o for o in y])
775            elif y.__class__.__name__ in ['Corr', 'CObs']:
776                return NotImplemented
777            else:
778                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
779
780    def __rsub__(self, y):
781        return -1 * (self - y)
782
783    def __pos__(self):
784        return self
785
786    def __neg__(self):
787        return -1 * self
788
789    def __truediv__(self, y):
790        if isinstance(y, Obs):
791            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
792        else:
793            if isinstance(y, np.ndarray):
794                return np.array([self / o for o in y])
795            elif y.__class__.__name__ in ['Corr', 'CObs']:
796                return NotImplemented
797            else:
798                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
799
800    def __rtruediv__(self, y):
801        if isinstance(y, Obs):
802            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
803        else:
804            if isinstance(y, np.ndarray):
805                return np.array([o / self for o in y])
806            elif y.__class__.__name__ in ['Corr', 'CObs']:
807                return NotImplemented
808            else:
809                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
810
811    def __pow__(self, y):
812        if isinstance(y, Obs):
813            return derived_observable(lambda x: x[0] ** x[1], [self, y])
814        else:
815            return derived_observable(lambda x: x[0] ** y, [self])
816
817    def __rpow__(self, y):
818        if isinstance(y, Obs):
819            return derived_observable(lambda x: x[0] ** x[1], [y, self])
820        else:
821            return derived_observable(lambda x: y ** x[0], [self])
822
823    def __abs__(self):
824        return derived_observable(lambda x: anp.abs(x[0]), [self])
825
826    # Overload numpy functions
827    def sqrt(self):
828        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
829
830    def log(self):
831        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
832
833    def exp(self):
834        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
835
836    def sin(self):
837        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
838
839    def cos(self):
840        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
841
842    def tan(self):
843        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
844
845    def arcsin(self):
846        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
847
848    def arccos(self):
849        return derived_observable(lambda x: anp.arccos(x[0]), [self])
850
851    def arctan(self):
852        return derived_observable(lambda x: anp.arctan(x[0]), [self])
853
854    def sinh(self):
855        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
856
857    def cosh(self):
858        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
859
860    def tanh(self):
861        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
862
863    def arcsinh(self):
864        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
865
866    def arccosh(self):
867        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
868
869    def arctanh(self):
870        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 62    def __init__(self, samples, names, idl=None, **kwargs):
 63        """ Initialize Obs object.
 64
 65        Parameters
 66        ----------
 67        samples : list
 68            list of numpy arrays containing the Monte Carlo samples
 69        names : list
 70            list of strings labeling the individual samples
 71        idl : list, optional
 72            list of ranges or lists on which the samples are defined
 73        """
 74
 75        if kwargs.get("means") is None and len(samples):
 76            if len(samples) != len(names):
 77                raise ValueError('Length of samples and names incompatible.')
 78            if idl is not None:
 79                if len(idl) != len(names):
 80                    raise ValueError('Length of idl incompatible with samples and names.')
 81            name_length = len(names)
 82            if name_length > 1:
 83                if name_length != len(set(names)):
 84                    raise ValueError('Names are not unique.')
 85                if not all(isinstance(x, str) for x in names):
 86                    raise TypeError('All names have to be strings.')
 87            else:
 88                if not isinstance(names[0], str):
 89                    raise TypeError('All names have to be strings.')
 90            if min(len(x) for x in samples) <= 4:
 91                raise ValueError('Samples have to have at least 5 entries.')
 92
 93        self.names = sorted(names)
 94        self.shape = {}
 95        self.r_values = {}
 96        self.deltas = {}
 97        self._covobs = {}
 98
 99        self._value = 0
100        self.N = 0
101        self.idl = {}
102        if idl is not None:
103            for name, idx in sorted(zip(names, idl)):
104                if isinstance(idx, range):
105                    self.idl[name] = idx
106                elif isinstance(idx, (list, np.ndarray)):
107                    dc = np.unique(np.diff(idx))
108                    if np.any(dc < 0):
109                        raise ValueError("Unsorted idx for idl[%s]" % (name))
110                    if len(dc) == 1:
111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
112                    else:
113                        self.idl[name] = list(idx)
114                else:
115                    raise TypeError('incompatible type for idl[%s].' % (name))
116        else:
117            for name, sample in sorted(zip(names, samples)):
118                self.idl[name] = range(1, len(sample) + 1)
119
120        if kwargs.get("means") is not None:
121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
122                self.shape[name] = len(self.idl[name])
123                self.N += self.shape[name]
124                self.r_values[name] = mean
125                self.deltas[name] = sample
126        else:
127            for name, sample in sorted(zip(names, samples)):
128                self.shape[name] = len(self.idl[name])
129                self.N += self.shape[name]
130                if len(sample) != self.shape[name]:
131                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
132                self.r_values[name] = np.mean(sample)
133                self.deltas[name] = sample - self.r_values[name]
134                self._value += self.shape[name] * self.r_values[name]
135            self._value /= self.N
136
137        self._dvalue = 0.0
138        self.ddvalue = 0.0
139        self.reweighted = False
140
141        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
def gamma_method(self, **kwargs):
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]))
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            gaps = []
273            for r_name in e_content[e_name]:
274                if isinstance(self.idl[r_name], range):
275                    gaps.append(1)
276                else:
277                    gaps.append(np.min(np.diff(self.idl[r_name])))
278
279            if not np.all([gi == gaps[0] for gi in gaps]):
280                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
281            else:
282                gapsize = gaps[0]
283
284            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
285            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
286            # Make sure no entry of tauint is smaller than 0.5
287            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
288            # hep-lat/0306017 eq. (42)
289            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
290            self.e_n_dtauint[e_name][0] = 0.0
291
292            def _compute_drho(i):
293                tmp = (self.e_rho[e_name][i + 1:w_max]
294                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1],
295                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
296                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
297                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
298
299            if self.tau_exp[e_name] > 0:
300                _compute_drho(gapsize)
301                texp = self.tau_exp[e_name]
302                # Critical slowing down analysis
303                if w_max // 2 <= 1:
304                    raise Exception("Need at least 8 samples for tau_exp error analysis")
305                for n in range(gapsize, w_max // 2, gapsize):
306                    _compute_drho(n + gapsize)
307                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
308                        # Bias correction hep-lat/0306017 eq. (49) included
309                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
310                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
311                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
312                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
313                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
314                        self.e_windowsize[e_name] = n
315                        break
316            else:
317                if self.S[e_name] == 0.0:
318                    self.e_tauint[e_name] = 0.5
319                    self.e_dtauint[e_name] = 0.0
320                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
321                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
322                    self.e_windowsize[e_name] = 0
323                else:
324                    # Standard automatic windowing procedure
325                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
326                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
327                    for n in range(1, w_max // gapsize):
328                        if g_w[n - 1] < 0 or n >= w_max // gapsize - 1:
329                            _compute_drho(gapsize * n)
330                            n *= gapsize
331                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
332                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
333                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
334                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
335                            self.e_windowsize[e_name] = n
336                            break
337
338            self._dvalue += self.e_dvalue[e_name] ** 2
339            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
340
341        for e_name in self.cov_names:
342            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
343            self.e_ddvalue[e_name] = 0
344            self._dvalue += self.e_dvalue[e_name]**2
345
346        self._dvalue = np.sqrt(self._dvalue)
347        if self._dvalue == 0.0:
348            self.ddvalue = 0.0
349        else:
350            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
351        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def gm(self, **kwargs):
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]))
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            gaps = []
273            for r_name in e_content[e_name]:
274                if isinstance(self.idl[r_name], range):
275                    gaps.append(1)
276                else:
277                    gaps.append(np.min(np.diff(self.idl[r_name])))
278
279            if not np.all([gi == gaps[0] for gi in gaps]):
280                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
281            else:
282                gapsize = gaps[0]
283
284            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
285            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
286            # Make sure no entry of tauint is smaller than 0.5
287            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
288            # hep-lat/0306017 eq. (42)
289            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
290            self.e_n_dtauint[e_name][0] = 0.0
291
292            def _compute_drho(i):
293                tmp = (self.e_rho[e_name][i + 1:w_max]
294                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1],
295                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
296                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
297                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
298
299            if self.tau_exp[e_name] > 0:
300                _compute_drho(gapsize)
301                texp = self.tau_exp[e_name]
302                # Critical slowing down analysis
303                if w_max // 2 <= 1:
304                    raise Exception("Need at least 8 samples for tau_exp error analysis")
305                for n in range(gapsize, w_max // 2, gapsize):
306                    _compute_drho(n + gapsize)
307                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
308                        # Bias correction hep-lat/0306017 eq. (49) included
309                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
310                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
311                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
312                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
313                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
314                        self.e_windowsize[e_name] = n
315                        break
316            else:
317                if self.S[e_name] == 0.0:
318                    self.e_tauint[e_name] = 0.5
319                    self.e_dtauint[e_name] = 0.0
320                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
321                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
322                    self.e_windowsize[e_name] = 0
323                else:
324                    # Standard automatic windowing procedure
325                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
326                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
327                    for n in range(1, w_max // gapsize):
328                        if g_w[n - 1] < 0 or n >= w_max // gapsize - 1:
329                            _compute_drho(gapsize * n)
330                            n *= gapsize
331                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
332                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
333                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
334                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
335                            self.e_windowsize[e_name] = n
336                            break
337
338            self._dvalue += self.e_dvalue[e_name] ** 2
339            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
340
341        for e_name in self.cov_names:
342            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
343            self.e_ddvalue[e_name] = 0
344            self._dvalue += self.e_dvalue[e_name]**2
345
346        self._dvalue = np.sqrt(self._dvalue)
347        if self._dvalue == 0.0:
348            self.ddvalue = 0.0
349        else:
350            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
351        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True):
388    def details(self, ens_content=True):
389        """Output detailed properties of the Obs.
390
391        Parameters
392        ----------
393        ens_content : bool
394            print details about the ensembles and replica if true.
395        """
396        if self.tag is not None:
397            print("Description:", self.tag)
398        if not hasattr(self, 'e_dvalue'):
399            print('Result\t %3.8e' % (self.value))
400        else:
401            if self.value == 0.0:
402                percentage = np.nan
403            else:
404                percentage = np.abs(self._dvalue / self.value) * 100
405            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
406            if len(self.e_names) > 1:
407                print(' Ensemble errors:')
408            e_content = self.e_content
409            for e_name in self.mc_names:
410                if isinstance(self.idl[e_content[e_name][0]], range):
411                    gap = self.idl[e_content[e_name][0]].step
412                else:
413                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
414
415                if len(self.e_names) > 1:
416                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
417                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
418                tau_string += f" in units of {gap} config"
419                if gap > 1:
420                    tau_string += "s"
421                if self.tau_exp[e_name] > 0:
422                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
423                else:
424                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
425                print(tau_string)
426            for e_name in self.cov_names:
427                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
428        if ens_content is True:
429            if len(self.e_names) == 1:
430                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
431            else:
432                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
433            my_string_list = []
434            for key, value in sorted(self.e_content.items()):
435                if key not in self.covobs:
436                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
437                    if len(value) == 1:
438                        my_string += f': {self.shape[value[0]]} configurations'
439                        if isinstance(self.idl[value[0]], range):
440                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
441                        else:
442                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
443                    else:
444                        sublist = []
445                        for v in value:
446                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
447                            my_substring += f': {self.shape[v]} configurations'
448                            if isinstance(self.idl[v], range):
449                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
450                            else:
451                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
452                            sublist.append(my_substring)
453
454                        my_string += '\n' + '\n'.join(sublist)
455                else:
456                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
457                my_string_list.append(my_string)
458            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def reweight(self, weight):
460    def reweight(self, weight):
461        """Reweight the obs with given rewighting factors.
462
463        Parameters
464        ----------
465        weight : Obs
466            Reweighting factor. An Observable that has to be defined on a superset of the
467            configurations in obs[i].idl for all i.
468        all_configs : bool
469            if True, the reweighted observables are normalized by the average of
470            the reweighting factor on all configurations in weight.idl and not
471            on the configurations in obs[i].idl. Default False.
472        """
473        return reweight(weight, [self])[0]

Reweight the obs with given rewighting factors.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def is_zero_within_error(self, sigma=1):
475    def is_zero_within_error(self, sigma=1):
476        """Checks whether the observable is zero within 'sigma' standard errors.
477
478        Parameters
479        ----------
480        sigma : int
481            Number of standard errors used for the check.
482
483        Works only properly when the gamma method was run.
484        """
485        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10):
487    def is_zero(self, atol=1e-10):
488        """Checks whether the observable is zero within a given tolerance.
489
490        Parameters
491        ----------
492        atol : float
493            Absolute tolerance (for details see numpy documentation).
494        """
495        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None):
497    def plot_tauint(self, save=None):
498        """Plot integrated autocorrelation time for each ensemble.
499
500        Parameters
501        ----------
502        save : str
503            saves the figure to a file named 'save' if.
504        """
505        if not hasattr(self, 'e_dvalue'):
506            raise Exception('Run the gamma method first.')
507
508        for e, e_name in enumerate(self.mc_names):
509            fig = plt.figure()
510            plt.xlabel(r'$W$')
511            plt.ylabel(r'$\tau_\mathrm{int}$')
512            length = int(len(self.e_n_tauint[e_name]))
513            if self.tau_exp[e_name] > 0:
514                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
515                x_help = np.arange(2 * self.tau_exp[e_name])
516                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
517                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
518                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
519                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
520                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
521                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
522                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
523            else:
524                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
525                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
526
527            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
528            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
529            plt.legend()
530            plt.xlim(-0.5, xmax)
531            ylim = plt.ylim()
532            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
533            plt.draw()
534            if save:
535                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None):
537    def plot_rho(self, save=None):
538        """Plot normalized autocorrelation function time for each ensemble.
539
540        Parameters
541        ----------
542        save : str
543            saves the figure to a file named 'save' if.
544        """
545        if not hasattr(self, 'e_dvalue'):
546            raise Exception('Run the gamma method first.')
547        for e, e_name in enumerate(self.mc_names):
548            fig = plt.figure()
549            plt.xlabel('W')
550            plt.ylabel('rho')
551            length = int(len(self.e_drho[e_name]))
552            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
553            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
554            if self.tau_exp[e_name] > 0:
555                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
556                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
557                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
558                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
559            else:
560                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
561                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
562            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
563            plt.xlim(-0.5, xmax)
564            plt.draw()
565            if save:
566                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self):
568    def plot_rep_dist(self):
569        """Plot replica distribution for each ensemble with more than one replicum."""
570        if not hasattr(self, 'e_dvalue'):
571            raise Exception('Run the gamma method first.')
572        for e, e_name in enumerate(self.mc_names):
573            if len(self.e_content[e_name]) == 1:
574                print('No replica distribution for a single replicum (', e_name, ')')
575                continue
576            r_length = []
577            sub_r_mean = 0
578            for r, r_name in enumerate(self.e_content[e_name]):
579                r_length.append(len(self.deltas[r_name]))
580                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
581            e_N = np.sum(r_length)
582            sub_r_mean /= e_N
583            arr = np.zeros(len(self.e_content[e_name]))
584            for r, r_name in enumerate(self.e_content[e_name]):
585                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
586            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
587            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
588            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True):
590    def plot_history(self, expand=True):
591        """Plot derived Monte Carlo history for each ensemble
592
593        Parameters
594        ----------
595        expand : bool
596            show expanded history for irregular Monte Carlo chains (default: True).
597        """
598        for e, e_name in enumerate(self.mc_names):
599            plt.figure()
600            r_length = []
601            tmp = []
602            tmp_expanded = []
603            for r, r_name in enumerate(self.e_content[e_name]):
604                tmp.append(self.deltas[r_name] + self.r_values[r_name])
605                if expand:
606                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
607                    r_length.append(len(tmp_expanded[-1]))
608                else:
609                    r_length.append(len(tmp[-1]))
610            e_N = np.sum(r_length)
611            x = np.arange(e_N)
612            y_test = np.concatenate(tmp, axis=0)
613            if expand:
614                y = np.concatenate(tmp_expanded, axis=0)
615            else:
616                y = y_test
617            plt.errorbar(x, y, fmt='.', markersize=3)
618            plt.xlim(-0.5, e_N - 0.5)
619            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
620            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None):
622    def plot_piechart(self, save=None):
623        """Plot piechart which shows the fractional contribution of each
624        ensemble to the error and returns a dictionary containing the fractions.
625
626        Parameters
627        ----------
628        save : str
629            saves the figure to a file named 'save' if.
630        """
631        if not hasattr(self, 'e_dvalue'):
632            raise Exception('Run the gamma method first.')
633        if np.isclose(0.0, self._dvalue, atol=1e-15):
634            raise Exception('Error is 0.0')
635        labels = self.e_names
636        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
637        fig1, ax1 = plt.subplots()
638        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
639        ax1.axis('equal')
640        plt.draw()
641        if save:
642            fig1.savefig(save)
643
644        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs):
646    def dump(self, filename, datatype="json.gz", description="", **kwargs):
647        """Dump the Obs to a file 'name' of chosen format.
648
649        Parameters
650        ----------
651        filename : str
652            name of the file to be saved.
653        datatype : str
654            Format of the exported file. Supported formats include
655            "json.gz" and "pickle"
656        description : str
657            Description for output file, only relevant for json.gz format.
658        path : str
659            specifies a custom path for the file (default '.')
660        """
661        if 'path' in kwargs:
662            file_name = kwargs.get('path') + '/' + filename
663        else:
664            file_name = filename
665
666        if datatype == "json.gz":
667            from .input.json import dump_to_json
668            dump_to_json([self], file_name, description=description)
669        elif datatype == "pickle":
670            with open(file_name + '.p', 'wb') as fb:
671                pickle.dump(self, fb)
672        else:
673            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self):
675    def export_jackknife(self):
676        """Export jackknife samples from the Obs
677
678        Returns
679        -------
680        numpy.ndarray
681            Returns a numpy array of length N + 1 where N is the number of samples
682            for the given ensemble and replicum. The zeroth entry of the array contains
683            the mean value of the Obs, entries 1 to N contain the N jackknife samples
684            derived from the Obs. The current implementation only works for observables
685            defined on exactly one ensemble and replicum. The derived jackknife samples
686            should agree with samples from a full jackknife analysis up to O(1/N).
687        """
688
689        if len(self.names) != 1:
690            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
691
692        name = self.names[0]
693        full_data = self.deltas[name] + self.r_values[name]
694        n = full_data.size
695        mean = self.value
696        tmp_jacks = np.zeros(n + 1)
697        tmp_jacks[0] = mean
698        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
699        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def sqrt(self):
827    def sqrt(self):
828        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self):
830    def log(self):
831        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self):
833    def exp(self):
834        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self):
836    def sin(self):
837        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self):
839    def cos(self):
840        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self):
842    def tan(self):
843        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self):
845    def arcsin(self):
846        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self):
848    def arccos(self):
849        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self):
851    def arctan(self):
852        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self):
854    def sinh(self):
855        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self):
857    def cosh(self):
858        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self):
860    def tanh(self):
861        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self):
863    def arcsinh(self):
864        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self):
866    def arccosh(self):
867        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self):
869    def arctanh(self):
870        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
class CObs:
873class CObs:
874    """Class for a complex valued observable."""
875    __slots__ = ['_real', '_imag', 'tag']
876
877    def __init__(self, real, imag=0.0):
878        self._real = real
879        self._imag = imag
880        self.tag = None
881
882    @property
883    def real(self):
884        return self._real
885
886    @property
887    def imag(self):
888        return self._imag
889
890    def gamma_method(self, **kwargs):
891        """Executes the gamma_method for the real and the imaginary part."""
892        if isinstance(self.real, Obs):
893            self.real.gamma_method(**kwargs)
894        if isinstance(self.imag, Obs):
895            self.imag.gamma_method(**kwargs)
896
897    def is_zero(self):
898        """Checks whether both real and imaginary part are zero within machine precision."""
899        return self.real == 0.0 and self.imag == 0.0
900
901    def conjugate(self):
902        return CObs(self.real, -self.imag)
903
904    def __add__(self, other):
905        if isinstance(other, np.ndarray):
906            return other + self
907        elif hasattr(other, 'real') and hasattr(other, 'imag'):
908            return CObs(self.real + other.real,
909                        self.imag + other.imag)
910        else:
911            return CObs(self.real + other, self.imag)
912
913    def __radd__(self, y):
914        return self + y
915
916    def __sub__(self, other):
917        if isinstance(other, np.ndarray):
918            return -1 * (other - self)
919        elif hasattr(other, 'real') and hasattr(other, 'imag'):
920            return CObs(self.real - other.real, self.imag - other.imag)
921        else:
922            return CObs(self.real - other, self.imag)
923
924    def __rsub__(self, other):
925        return -1 * (self - other)
926
927    def __mul__(self, other):
928        if isinstance(other, np.ndarray):
929            return other * self
930        elif hasattr(other, 'real') and hasattr(other, 'imag'):
931            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
932                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
933                                               [self.real, other.real, self.imag, other.imag],
934                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
935                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
936                                               [self.real, other.real, self.imag, other.imag],
937                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
938            elif getattr(other, 'imag', 0) != 0:
939                return CObs(self.real * other.real - self.imag * other.imag,
940                            self.imag * other.real + self.real * other.imag)
941            else:
942                return CObs(self.real * other.real, self.imag * other.real)
943        else:
944            return CObs(self.real * other, self.imag * other)
945
946    def __rmul__(self, other):
947        return self * other
948
949    def __truediv__(self, other):
950        if isinstance(other, np.ndarray):
951            return 1 / (other / self)
952        elif hasattr(other, 'real') and hasattr(other, 'imag'):
953            r = other.real ** 2 + other.imag ** 2
954            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
955        else:
956            return CObs(self.real / other, self.imag / other)
957
958    def __rtruediv__(self, other):
959        r = self.real ** 2 + self.imag ** 2
960        if hasattr(other, 'real') and hasattr(other, 'imag'):
961            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
962        else:
963            return CObs(self.real * other / r, -self.imag * other / r)
964
965    def __abs__(self):
966        return np.sqrt(self.real**2 + self.imag**2)
967
968    def __pos__(self):
969        return self
970
971    def __neg__(self):
972        return -1 * self
973
974    def __eq__(self, other):
975        return self.real == other.real and self.imag == other.imag
976
977    def __str__(self):
978        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
979
980    def __repr__(self):
981        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0)
877    def __init__(self, real, imag=0.0):
878        self._real = real
879        self._imag = imag
880        self.tag = None
def gamma_method(self, **kwargs):
890    def gamma_method(self, **kwargs):
891        """Executes the gamma_method for the real and the imaginary part."""
892        if isinstance(self.real, Obs):
893            self.real.gamma_method(**kwargs)
894        if isinstance(self.imag, Obs):
895            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self):
897    def is_zero(self):
898        """Checks whether both real and imaginary part are zero within machine precision."""
899        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self):
901    def conjugate(self):
902        return CObs(self.real, -self.imag)
def derived_observable(func, data, array_mode=False, **kwargs):
1106def derived_observable(func, data, array_mode=False, **kwargs):
1107    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1108
1109    Parameters
1110    ----------
1111    func : object
1112        arbitrary function of the form func(data, **kwargs). For the
1113        automatic differentiation to work, all numpy functions have to have
1114        the autograd wrapper (use 'import autograd.numpy as anp').
1115    data : list
1116        list of Obs, e.g. [obs1, obs2, obs3].
1117    num_grad : bool
1118        if True, numerical derivatives are used instead of autograd
1119        (default False). To control the numerical differentiation the
1120        kwargs of numdifftools.step_generators.MaxStepGenerator
1121        can be used.
1122    man_grad : list
1123        manually supply a list or an array which contains the jacobian
1124        of func. Use cautiously, supplying the wrong derivative will
1125        not be intercepted.
1126
1127    Notes
1128    -----
1129    For simple mathematical operations it can be practical to use anonymous
1130    functions. For the ratio of two observables one can e.g. use
1131
1132    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1133    """
1134
1135    data = np.asarray(data)
1136    raveled_data = data.ravel()
1137
1138    # Workaround for matrix operations containing non Obs data
1139    if not all(isinstance(x, Obs) for x in raveled_data):
1140        for i in range(len(raveled_data)):
1141            if isinstance(raveled_data[i], (int, float)):
1142                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1143
1144    allcov = {}
1145    for o in raveled_data:
1146        for name in o.cov_names:
1147            if name in allcov:
1148                if not np.allclose(allcov[name], o.covobs[name].cov):
1149                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1150            else:
1151                allcov[name] = o.covobs[name].cov
1152
1153    n_obs = len(raveled_data)
1154    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1155    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1156    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1157
1158    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1159
1160    if data.ndim == 1:
1161        values = np.array([o.value for o in data])
1162    else:
1163        values = np.vectorize(lambda x: x.value)(data)
1164
1165    new_values = func(values, **kwargs)
1166
1167    multi = int(isinstance(new_values, np.ndarray))
1168
1169    new_r_values = {}
1170    new_idl_d = {}
1171    for name in new_sample_names:
1172        idl = []
1173        tmp_values = np.zeros(n_obs)
1174        for i, item in enumerate(raveled_data):
1175            tmp_values[i] = item.r_values.get(name, item.value)
1176            tmp_idl = item.idl.get(name)
1177            if tmp_idl is not None:
1178                idl.append(tmp_idl)
1179        if multi > 0:
1180            tmp_values = np.array(tmp_values).reshape(data.shape)
1181        new_r_values[name] = func(tmp_values, **kwargs)
1182        new_idl_d[name] = _merge_idx(idl)
1183
1184    if 'man_grad' in kwargs:
1185        deriv = np.asarray(kwargs.get('man_grad'))
1186        if new_values.shape + data.shape != deriv.shape:
1187            raise Exception('Manual derivative does not have correct shape.')
1188    elif kwargs.get('num_grad') is True:
1189        if multi > 0:
1190            raise Exception('Multi mode currently not supported for numerical derivative')
1191        options = {
1192            'base_step': 0.1,
1193            'step_ratio': 2.5}
1194        for key in options.keys():
1195            kwarg = kwargs.get(key)
1196            if kwarg is not None:
1197                options[key] = kwarg
1198        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1199        if tmp_df.size == 1:
1200            deriv = np.array([tmp_df.real])
1201        else:
1202            deriv = tmp_df.real
1203    else:
1204        deriv = jacobian(func)(values, **kwargs)
1205
1206    final_result = np.zeros(new_values.shape, dtype=object)
1207
1208    if array_mode is True:
1209
1210        class _Zero_grad():
1211            def __init__(self, N):
1212                self.grad = np.zeros((N, 1))
1213
1214        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1215        d_extracted = {}
1216        g_extracted = {}
1217        for name in new_sample_names:
1218            d_extracted[name] = []
1219            ens_length = len(new_idl_d[name])
1220            for i_dat, dat in enumerate(data):
1221                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1222        for name in new_cov_names:
1223            g_extracted[name] = []
1224            zero_grad = _Zero_grad(new_covobs_lengths[name])
1225            for i_dat, dat in enumerate(data):
1226                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1227
1228    for i_val, new_val in np.ndenumerate(new_values):
1229        new_deltas = {}
1230        new_grad = {}
1231        if array_mode is True:
1232            for name in new_sample_names:
1233                ens_length = d_extracted[name][0].shape[-1]
1234                new_deltas[name] = np.zeros(ens_length)
1235                for i_dat, dat in enumerate(d_extracted[name]):
1236                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1237            for name in new_cov_names:
1238                new_grad[name] = 0
1239                for i_dat, dat in enumerate(g_extracted[name]):
1240                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1241        else:
1242            for j_obs, obs in np.ndenumerate(data):
1243                for name in obs.names:
1244                    if name in obs.cov_names:
1245                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1246                    else:
1247                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1248
1249        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1250
1251        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1252            raise Exception('The same name has been used for deltas and covobs!')
1253        new_samples = []
1254        new_means = []
1255        new_idl = []
1256        new_names_obs = []
1257        for name in new_names:
1258            if name not in new_covobs:
1259                new_samples.append(new_deltas[name])
1260                new_idl.append(new_idl_d[name])
1261                new_means.append(new_r_values[name][i_val])
1262                new_names_obs.append(name)
1263        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1264        for name in new_covobs:
1265            final_result[i_val].names.append(name)
1266        final_result[i_val]._covobs = new_covobs
1267        final_result[i_val]._value = new_val
1268        final_result[i_val].reweighted = reweighted
1269
1270    if multi == 0:
1271        final_result = final_result.item()
1272
1273    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs):
1310def reweight(weight, obs, **kwargs):
1311    """Reweight a list of observables.
1312
1313    Parameters
1314    ----------
1315    weight : Obs
1316        Reweighting factor. An Observable that has to be defined on a superset of the
1317        configurations in obs[i].idl for all i.
1318    obs : list
1319        list of Obs, e.g. [obs1, obs2, obs3].
1320    all_configs : bool
1321        if True, the reweighted observables are normalized by the average of
1322        the reweighting factor on all configurations in weight.idl and not
1323        on the configurations in obs[i].idl. Default False.
1324    """
1325    result = []
1326    for i in range(len(obs)):
1327        if len(obs[i].cov_names):
1328            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1329        if not set(obs[i].names).issubset(weight.names):
1330            raise Exception('Error: Ensembles do not fit')
1331        for name in obs[i].names:
1332            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1333                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1334        new_samples = []
1335        w_deltas = {}
1336        for name in sorted(obs[i].names):
1337            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1338            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1339        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1340
1341        if kwargs.get('all_configs'):
1342            new_weight = weight
1343        else:
1344            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1345
1346        result.append(tmp_obs / new_weight)
1347        result[-1].reweighted = True
1348
1349    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def correlate(obs_a, obs_b):
1352def correlate(obs_a, obs_b):
1353    """Correlate two observables.
1354
1355    Parameters
1356    ----------
1357    obs_a : Obs
1358        First observable
1359    obs_b : Obs
1360        Second observable
1361
1362    Notes
1363    -----
1364    Keep in mind to only correlate primary observables which have not been reweighted
1365    yet. The reweighting has to be applied after correlating the observables.
1366    Currently only works if ensembles are identical (this is not strictly necessary).
1367    """
1368
1369    if sorted(obs_a.names) != sorted(obs_b.names):
1370        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1371    if len(obs_a.cov_names) or len(obs_b.cov_names):
1372        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1373    for name in obs_a.names:
1374        if obs_a.shape[name] != obs_b.shape[name]:
1375            raise Exception('Shapes of ensemble', name, 'do not fit')
1376        if obs_a.idl[name] != obs_b.idl[name]:
1377            raise Exception('idl of ensemble', name, 'do not fit')
1378
1379    if obs_a.reweighted is True:
1380        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1381    if obs_b.reweighted is True:
1382        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1383
1384    new_samples = []
1385    new_idl = []
1386    for name in sorted(obs_a.names):
1387        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1388        new_idl.append(obs_a.idl[name])
1389
1390    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1391    o.reweighted = obs_a.reweighted or obs_b.reweighted
1392    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1395def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1396    r'''Calculates the error covariance matrix of a set of observables.
1397
1398    WARNING: This function should be used with care, especially for observables with support on multiple
1399             ensembles with differing autocorrelations. See the notes below for details.
1400
1401    The gamma method has to be applied first to all observables.
1402
1403    Parameters
1404    ----------
1405    obs : list or numpy.ndarray
1406        List or one dimensional array of Obs
1407    visualize : bool
1408        If True plots the corresponding normalized correlation matrix (default False).
1409    correlation : bool
1410        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1411    smooth : None or int
1412        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1413        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1414        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1415        small ones.
1416
1417    Notes
1418    -----
1419    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1420    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1421    in the absence of autocorrelation.
1422    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1423    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1424    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1425    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1426    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1427    '''
1428
1429    length = len(obs)
1430
1431    max_samples = np.max([o.N for o in obs])
1432    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1433        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1434
1435    cov = np.zeros((length, length))
1436    for i in range(length):
1437        for j in range(i, length):
1438            cov[i, j] = _covariance_element(obs[i], obs[j])
1439    cov = cov + cov.T - np.diag(np.diag(cov))
1440
1441    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1442
1443    if isinstance(smooth, int):
1444        corr = _smooth_eigenvalues(corr, smooth)
1445
1446    if visualize:
1447        plt.matshow(corr, vmin=-1, vmax=1)
1448        plt.set_cmap('RdBu')
1449        plt.colorbar()
1450        plt.draw()
1451
1452    if correlation is True:
1453        return corr
1454
1455    errors = [o.dvalue for o in obs]
1456    cov = np.diag(errors) @ corr @ np.diag(errors)
1457
1458    eigenvalues = np.linalg.eigh(cov)[0]
1459    if not np.all(eigenvalues >= 0):
1460        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1461
1462    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None):
1542def import_jackknife(jacks, name, idl=None):
1543    """Imports jackknife samples and returns an Obs
1544
1545    Parameters
1546    ----------
1547    jacks : numpy.ndarray
1548        numpy array containing the mean value as zeroth entry and
1549        the N jackknife samples as first to Nth entry.
1550    name : str
1551        name of the ensemble the samples are defined on.
1552    """
1553    length = len(jacks) - 1
1554    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1555    samples = jacks[1:] @ prj
1556    mean = np.mean(samples)
1557    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1558    new_obs._value = jacks[0]
1559    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def merge_obs(list_of_obs):
1562def merge_obs(list_of_obs):
1563    """Combine all observables in list_of_obs into one new observable
1564
1565    Parameters
1566    ----------
1567    list_of_obs : list
1568        list of the Obs object to be combined
1569
1570    Notes
1571    -----
1572    It is not possible to combine obs which are based on the same replicum
1573    """
1574    replist = [item for obs in list_of_obs for item in obs.names]
1575    if (len(replist) == len(set(replist))) is False:
1576        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1577    if any([len(o.cov_names) for o in list_of_obs]):
1578        raise Exception('Not possible to merge data that contains covobs!')
1579    new_dict = {}
1580    idl_dict = {}
1581    for o in list_of_obs:
1582        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1583                        for key in set(o.deltas) | set(o.r_values)})
1584        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1585
1586    names = sorted(new_dict.keys())
1587    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1588    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1589    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None):
1592def cov_Obs(means, cov, name, grad=None):
1593    """Create an Obs based on mean(s) and a covariance matrix
1594
1595    Parameters
1596    ----------
1597    mean : list of floats or float
1598        N mean value(s) of the new Obs
1599    cov : list or array
1600        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1601    name : str
1602        identifier for the covariance matrix
1603    grad : list or array
1604        Gradient of the Covobs wrt. the means belonging to cov.
1605    """
1606
1607    def covobs_to_obs(co):
1608        """Make an Obs out of a Covobs
1609
1610        Parameters
1611        ----------
1612        co : Covobs
1613            Covobs to be embedded into the Obs
1614        """
1615        o = Obs([], [], means=[])
1616        o._value = co.value
1617        o.names.append(co.name)
1618        o._covobs[co.name] = co
1619        o._dvalue = np.sqrt(co.errsq())
1620        return o
1621
1622    ol = []
1623    if isinstance(means, (float, int)):
1624        means = [means]
1625
1626    for i in range(len(means)):
1627        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1628    if ol[0].covobs[name].N != len(means):
1629        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1630    if len(ol) == 1:
1631        return ol[0]
1632    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.