pyerrors.obs

   1import warnings
   2import hashlib
   3import pickle
   4import numpy as np
   5import autograd.numpy as anp  # Thinly-wrapped numpy
   6import scipy
   7from autograd import jacobian
   8import matplotlib.pyplot as plt
   9from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  10import numdifftools as nd
  11from itertools import groupby
  12from .covobs import Covobs
  13
  14# Improve print output of numpy.ndarrays containing Obs objects.
  15np.set_printoptions(formatter={'object': lambda x: str(x)})
  16
  17
  18class Obs:
  19    """Class for a general observable.
  20
  21    Instances of Obs are the basic objects of a pyerrors error analysis.
  22    They are initialized with a list which contains arrays of samples for
  23    different ensembles/replica and another list of same length which contains
  24    the names of the ensembles/replica. Mathematical operations can be
  25    performed on instances. The result is another instance of Obs. The error of
  26    an instance can be computed with the gamma_method. Also contains additional
  27    methods for output and visualization of the error calculation.
  28
  29    Attributes
  30    ----------
  31    S_global : float
  32        Standard value for S (default 2.0)
  33    S_dict : dict
  34        Dictionary for S values. If an entry for a given ensemble
  35        exists this overwrites the standard value for that ensemble.
  36    tau_exp_global : float
  37        Standard value for tau_exp (default 0.0)
  38    tau_exp_dict : dict
  39        Dictionary for tau_exp values. If an entry for a given ensemble exists
  40        this overwrites the standard value for that ensemble.
  41    N_sigma_global : float
  42        Standard value for N_sigma (default 1.0)
  43    N_sigma_dict : dict
  44        Dictionary for N_sigma values. If an entry for a given ensemble exists
  45        this overwrites the standard value for that ensemble.
  46    """
  47    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  48                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  49                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  50                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  51                 'idl', 'tag', '_covobs', '__dict__']
  52
  53    S_global = 2.0
  54    S_dict = {}
  55    tau_exp_global = 0.0
  56    tau_exp_dict = {}
  57    N_sigma_global = 1.0
  58    N_sigma_dict = {}
  59
  60    def __init__(self, samples, names, idl=None, **kwargs):
  61        """ Initialize Obs object.
  62
  63        Parameters
  64        ----------
  65        samples : list
  66            list of numpy arrays containing the Monte Carlo samples
  67        names : list
  68            list of strings labeling the individual samples
  69        idl : list, optional
  70            list of ranges or lists on which the samples are defined
  71        """
  72
  73        if kwargs.get("means") is None and len(samples):
  74            if len(samples) != len(names):
  75                raise ValueError('Length of samples and names incompatible.')
  76            if idl is not None:
  77                if len(idl) != len(names):
  78                    raise ValueError('Length of idl incompatible with samples and names.')
  79            name_length = len(names)
  80            if name_length > 1:
  81                if name_length != len(set(names)):
  82                    raise ValueError('Names are not unique.')
  83                if not all(isinstance(x, str) for x in names):
  84                    raise TypeError('All names have to be strings.')
  85            else:
  86                if not isinstance(names[0], str):
  87                    raise TypeError('All names have to be strings.')
  88            if min(len(x) for x in samples) <= 4:
  89                raise ValueError('Samples have to have at least 5 entries.')
  90
  91        self.names = sorted(names)
  92        self.shape = {}
  93        self.r_values = {}
  94        self.deltas = {}
  95        self._covobs = {}
  96
  97        self._value = 0
  98        self.N = 0
  99        self.idl = {}
 100        if idl is not None:
 101            for name, idx in sorted(zip(names, idl)):
 102                if isinstance(idx, range):
 103                    self.idl[name] = idx
 104                elif isinstance(idx, (list, np.ndarray)):
 105                    dc = np.unique(np.diff(idx))
 106                    if np.any(dc < 0):
 107                        raise ValueError("Unsorted idx for idl[%s]" % (name))
 108                    if len(dc) == 1:
 109                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 110                    else:
 111                        self.idl[name] = list(idx)
 112                else:
 113                    raise TypeError('incompatible type for idl[%s].' % (name))
 114        else:
 115            for name, sample in sorted(zip(names, samples)):
 116                self.idl[name] = range(1, len(sample) + 1)
 117
 118        if kwargs.get("means") is not None:
 119            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 120                self.shape[name] = len(self.idl[name])
 121                self.N += self.shape[name]
 122                self.r_values[name] = mean
 123                self.deltas[name] = sample
 124        else:
 125            for name, sample in sorted(zip(names, samples)):
 126                self.shape[name] = len(self.idl[name])
 127                self.N += self.shape[name]
 128                if len(sample) != self.shape[name]:
 129                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 130                self.r_values[name] = np.mean(sample)
 131                self.deltas[name] = sample - self.r_values[name]
 132                self._value += self.shape[name] * self.r_values[name]
 133            self._value /= self.N
 134
 135        self._dvalue = 0.0
 136        self.ddvalue = 0.0
 137        self.reweighted = False
 138
 139        self.tag = None
 140
 141    @property
 142    def value(self):
 143        return self._value
 144
 145    @property
 146    def dvalue(self):
 147        return self._dvalue
 148
 149    @property
 150    def e_names(self):
 151        return sorted(set([o.split('|')[0] for o in self.names]))
 152
 153    @property
 154    def cov_names(self):
 155        return sorted(set([o for o in self.covobs.keys()]))
 156
 157    @property
 158    def mc_names(self):
 159        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 160
 161    @property
 162    def e_content(self):
 163        res = {}
 164        for e, e_name in enumerate(self.e_names):
 165            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 166            if e_name in self.names:
 167                res[e_name].append(e_name)
 168        return res
 169
 170    @property
 171    def covobs(self):
 172        return self._covobs
 173
 174    def gamma_method(self, **kwargs):
 175        """Estimate the error and related properties of the Obs.
 176
 177        Parameters
 178        ----------
 179        S : float
 180            specifies a custom value for the parameter S (default 2.0).
 181            If set to 0 it is assumed that the data exhibits no
 182            autocorrelation. In this case the error estimates coincides
 183            with the sample standard error.
 184        tau_exp : float
 185            positive value triggers the critical slowing down analysis
 186            (default 0.0).
 187        N_sigma : float
 188            number of standard deviations from zero until the tail is
 189            attached to the autocorrelation function (default 1).
 190        fft : bool
 191            determines whether the fft algorithm is used for the computation
 192            of the autocorrelation function (default True)
 193        """
 194
 195        e_content = self.e_content
 196        self.e_dvalue = {}
 197        self.e_ddvalue = {}
 198        self.e_tauint = {}
 199        self.e_dtauint = {}
 200        self.e_windowsize = {}
 201        self.e_n_tauint = {}
 202        self.e_n_dtauint = {}
 203        e_gamma = {}
 204        self.e_rho = {}
 205        self.e_drho = {}
 206        self._dvalue = 0
 207        self.ddvalue = 0
 208
 209        self.S = {}
 210        self.tau_exp = {}
 211        self.N_sigma = {}
 212
 213        if kwargs.get('fft') is False:
 214            fft = False
 215        else:
 216            fft = True
 217
 218        def _parse_kwarg(kwarg_name):
 219            if kwarg_name in kwargs:
 220                tmp = kwargs.get(kwarg_name)
 221                if isinstance(tmp, (int, float)):
 222                    if tmp < 0:
 223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 224                    for e, e_name in enumerate(self.e_names):
 225                        getattr(self, kwarg_name)[e_name] = tmp
 226                else:
 227                    raise TypeError(kwarg_name + ' is not in proper format.')
 228            else:
 229                for e, e_name in enumerate(self.e_names):
 230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 232                    else:
 233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 234
 235        _parse_kwarg('S')
 236        _parse_kwarg('tau_exp')
 237        _parse_kwarg('N_sigma')
 238
 239        for e, e_name in enumerate(self.mc_names):
 240            gapsize = _determine_gap(self, e_content, e_name)
 241
 242            r_length = []
 243            for r_name in e_content[e_name]:
 244                if isinstance(self.idl[r_name], range):
 245                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
 246                else:
 247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
 248
 249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 250            w_max = max(r_length) // 2
 251            e_gamma[e_name] = np.zeros(w_max)
 252            self.e_rho[e_name] = np.zeros(w_max)
 253            self.e_drho[e_name] = np.zeros(w_max)
 254
 255            for r_name in e_content[e_name]:
 256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 257
 258            gamma_div = np.zeros(w_max)
 259            for r_name in e_content[e_name]:
 260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 261            gamma_div[gamma_div < 1] = 1.0
 262            e_gamma[e_name] /= gamma_div[:w_max]
 263
 264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 265                self.e_tauint[e_name] = 0.5
 266                self.e_dtauint[e_name] = 0.0
 267                self.e_dvalue[e_name] = 0.0
 268                self.e_ddvalue[e_name] = 0.0
 269                self.e_windowsize[e_name] = 0
 270                continue
 271
 272            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 273            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 274            # Make sure no entry of tauint is smaller than 0.5
 275            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 276            # hep-lat/0306017 eq. (42)
 277            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 278            self.e_n_dtauint[e_name][0] = 0.0
 279
 280            def _compute_drho(i):
 281                tmp = (self.e_rho[e_name][i + 1:w_max]
 282                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
 283                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
 284                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
 285                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 286
 287            if self.tau_exp[e_name] > 0:
 288                _compute_drho(1)
 289                texp = self.tau_exp[e_name]
 290                # Critical slowing down analysis
 291                if w_max // 2 <= 1:
 292                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 293                for n in range(1, w_max // 2):
 294                    _compute_drho(n + 1)
 295                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 296                        # Bias correction hep-lat/0306017 eq. (49) included
 297                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 298                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 299                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 300                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 301                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 302                        self.e_windowsize[e_name] = n
 303                        break
 304            else:
 305                if self.S[e_name] == 0.0:
 306                    self.e_tauint[e_name] = 0.5
 307                    self.e_dtauint[e_name] = 0.0
 308                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 309                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 310                    self.e_windowsize[e_name] = 0
 311                else:
 312                    # Standard automatic windowing procedure
 313                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 314                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 315                    for n in range(1, w_max):
 316                        if g_w[n - 1] < 0 or n >= w_max - 1:
 317                            _compute_drho(n)
 318                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 319                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 320                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 321                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 322                            self.e_windowsize[e_name] = n
 323                            break
 324
 325            self._dvalue += self.e_dvalue[e_name] ** 2
 326            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 327
 328        for e_name in self.cov_names:
 329            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 330            self.e_ddvalue[e_name] = 0
 331            self._dvalue += self.e_dvalue[e_name]**2
 332
 333        self._dvalue = np.sqrt(self._dvalue)
 334        if self._dvalue == 0.0:
 335            self.ddvalue = 0.0
 336        else:
 337            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 338        return
 339
 340    gm = gamma_method
 341
 342    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
 343        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 344           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 345
 346        Parameters
 347        ----------
 348        deltas : list
 349            List of fluctuations
 350        idx : list
 351            List or range of configurations on which the deltas are defined.
 352        shape : int
 353            Number of configurations in idx.
 354        w_max : int
 355            Upper bound for the summation window.
 356        fft : bool
 357            determines whether the fft algorithm is used for the computation
 358            of the autocorrelation function.
 359        gapsize : int
 360            The target distance between two configurations. If longer distances
 361            are found in idx, the data is expanded.
 362        """
 363        gamma = np.zeros(w_max)
 364        deltas = _expand_deltas(deltas, idx, shape, gapsize)
 365        new_shape = len(deltas)
 366        if fft:
 367            max_gamma = min(new_shape, w_max)
 368            # The padding for the fft has to be even
 369            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 370            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 371        else:
 372            for n in range(w_max):
 373                if new_shape - n >= 0:
 374                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 375
 376        return gamma
 377
 378    def details(self, ens_content=True):
 379        """Output detailed properties of the Obs.
 380
 381        Parameters
 382        ----------
 383        ens_content : bool
 384            print details about the ensembles and replica if true.
 385        """
 386        if self.tag is not None:
 387            print("Description:", self.tag)
 388        if not hasattr(self, 'e_dvalue'):
 389            print('Result\t %3.8e' % (self.value))
 390        else:
 391            if self.value == 0.0:
 392                percentage = np.nan
 393            else:
 394                percentage = np.abs(self._dvalue / self.value) * 100
 395            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 396            if len(self.e_names) > 1:
 397                print(' Ensemble errors:')
 398            e_content = self.e_content
 399            for e_name in self.mc_names:
 400                gap = _determine_gap(self, e_content, e_name)
 401
 402                if len(self.e_names) > 1:
 403                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 404                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 405                tau_string += f" in units of {gap} config"
 406                if gap > 1:
 407                    tau_string += "s"
 408                if self.tau_exp[e_name] > 0:
 409                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 410                else:
 411                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 412                print(tau_string)
 413            for e_name in self.cov_names:
 414                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 415        if ens_content is True:
 416            if len(self.e_names) == 1:
 417                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 418            else:
 419                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 420            my_string_list = []
 421            for key, value in sorted(self.e_content.items()):
 422                if key not in self.covobs:
 423                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 424                    if len(value) == 1:
 425                        my_string += f': {self.shape[value[0]]} configurations'
 426                        if isinstance(self.idl[value[0]], range):
 427                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 428                        else:
 429                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 430                    else:
 431                        sublist = []
 432                        for v in value:
 433                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 434                            my_substring += f': {self.shape[v]} configurations'
 435                            if isinstance(self.idl[v], range):
 436                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 437                            else:
 438                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 439                            sublist.append(my_substring)
 440
 441                        my_string += '\n' + '\n'.join(sublist)
 442                else:
 443                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 444                my_string_list.append(my_string)
 445            print('\n'.join(my_string_list))
 446
 447    def reweight(self, weight):
 448        """Reweight the obs with given rewighting factors.
 449
 450        Parameters
 451        ----------
 452        weight : Obs
 453            Reweighting factor. An Observable that has to be defined on a superset of the
 454            configurations in obs[i].idl for all i.
 455        all_configs : bool
 456            if True, the reweighted observables are normalized by the average of
 457            the reweighting factor on all configurations in weight.idl and not
 458            on the configurations in obs[i].idl. Default False.
 459        """
 460        return reweight(weight, [self])[0]
 461
 462    def is_zero_within_error(self, sigma=1):
 463        """Checks whether the observable is zero within 'sigma' standard errors.
 464
 465        Parameters
 466        ----------
 467        sigma : int
 468            Number of standard errors used for the check.
 469
 470        Works only properly when the gamma method was run.
 471        """
 472        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 473
 474    def is_zero(self, atol=1e-10):
 475        """Checks whether the observable is zero within a given tolerance.
 476
 477        Parameters
 478        ----------
 479        atol : float
 480            Absolute tolerance (for details see numpy documentation).
 481        """
 482        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 483
 484    def plot_tauint(self, save=None):
 485        """Plot integrated autocorrelation time for each ensemble.
 486
 487        Parameters
 488        ----------
 489        save : str
 490            saves the figure to a file named 'save' if.
 491        """
 492        if not hasattr(self, 'e_dvalue'):
 493            raise Exception('Run the gamma method first.')
 494
 495        for e, e_name in enumerate(self.mc_names):
 496            fig = plt.figure()
 497            plt.xlabel(r'$W$')
 498            plt.ylabel(r'$\tau_\mathrm{int}$')
 499            length = int(len(self.e_n_tauint[e_name]))
 500            if self.tau_exp[e_name] > 0:
 501                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 502                x_help = np.arange(2 * self.tau_exp[e_name])
 503                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 504                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 505                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 506                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 507                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 508                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 509                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 510            else:
 511                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 512                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 513
 514            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 515            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 516            plt.legend()
 517            plt.xlim(-0.5, xmax)
 518            ylim = plt.ylim()
 519            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 520            plt.draw()
 521            if save:
 522                fig.savefig(save + "_" + str(e))
 523
 524    def plot_rho(self, save=None):
 525        """Plot normalized autocorrelation function time for each ensemble.
 526
 527        Parameters
 528        ----------
 529        save : str
 530            saves the figure to a file named 'save' if.
 531        """
 532        if not hasattr(self, 'e_dvalue'):
 533            raise Exception('Run the gamma method first.')
 534        for e, e_name in enumerate(self.mc_names):
 535            fig = plt.figure()
 536            plt.xlabel('W')
 537            plt.ylabel('rho')
 538            length = int(len(self.e_drho[e_name]))
 539            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 540            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 541            if self.tau_exp[e_name] > 0:
 542                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 543                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 544                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 545                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 546            else:
 547                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 548                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 549            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 550            plt.xlim(-0.5, xmax)
 551            plt.draw()
 552            if save:
 553                fig.savefig(save + "_" + str(e))
 554
 555    def plot_rep_dist(self):
 556        """Plot replica distribution for each ensemble with more than one replicum."""
 557        if not hasattr(self, 'e_dvalue'):
 558            raise Exception('Run the gamma method first.')
 559        for e, e_name in enumerate(self.mc_names):
 560            if len(self.e_content[e_name]) == 1:
 561                print('No replica distribution for a single replicum (', e_name, ')')
 562                continue
 563            r_length = []
 564            sub_r_mean = 0
 565            for r, r_name in enumerate(self.e_content[e_name]):
 566                r_length.append(len(self.deltas[r_name]))
 567                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 568            e_N = np.sum(r_length)
 569            sub_r_mean /= e_N
 570            arr = np.zeros(len(self.e_content[e_name]))
 571            for r, r_name in enumerate(self.e_content[e_name]):
 572                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 573            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 574            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 575            plt.draw()
 576
 577    def plot_history(self, expand=True):
 578        """Plot derived Monte Carlo history for each ensemble
 579
 580        Parameters
 581        ----------
 582        expand : bool
 583            show expanded history for irregular Monte Carlo chains (default: True).
 584        """
 585        for e, e_name in enumerate(self.mc_names):
 586            plt.figure()
 587            r_length = []
 588            tmp = []
 589            tmp_expanded = []
 590            for r, r_name in enumerate(self.e_content[e_name]):
 591                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 592                if expand:
 593                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
 594                    r_length.append(len(tmp_expanded[-1]))
 595                else:
 596                    r_length.append(len(tmp[-1]))
 597            e_N = np.sum(r_length)
 598            x = np.arange(e_N)
 599            y_test = np.concatenate(tmp, axis=0)
 600            if expand:
 601                y = np.concatenate(tmp_expanded, axis=0)
 602            else:
 603                y = y_test
 604            plt.errorbar(x, y, fmt='.', markersize=3)
 605            plt.xlim(-0.5, e_N - 0.5)
 606            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 607            plt.draw()
 608
 609    def plot_piechart(self, save=None):
 610        """Plot piechart which shows the fractional contribution of each
 611        ensemble to the error and returns a dictionary containing the fractions.
 612
 613        Parameters
 614        ----------
 615        save : str
 616            saves the figure to a file named 'save' if.
 617        """
 618        if not hasattr(self, 'e_dvalue'):
 619            raise Exception('Run the gamma method first.')
 620        if np.isclose(0.0, self._dvalue, atol=1e-15):
 621            raise Exception('Error is 0.0')
 622        labels = self.e_names
 623        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 624        fig1, ax1 = plt.subplots()
 625        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 626        ax1.axis('equal')
 627        plt.draw()
 628        if save:
 629            fig1.savefig(save)
 630
 631        return dict(zip(labels, sizes))
 632
 633    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 634        """Dump the Obs to a file 'name' of chosen format.
 635
 636        Parameters
 637        ----------
 638        filename : str
 639            name of the file to be saved.
 640        datatype : str
 641            Format of the exported file. Supported formats include
 642            "json.gz" and "pickle"
 643        description : str
 644            Description for output file, only relevant for json.gz format.
 645        path : str
 646            specifies a custom path for the file (default '.')
 647        """
 648        if 'path' in kwargs:
 649            file_name = kwargs.get('path') + '/' + filename
 650        else:
 651            file_name = filename
 652
 653        if datatype == "json.gz":
 654            from .input.json import dump_to_json
 655            dump_to_json([self], file_name, description=description)
 656        elif datatype == "pickle":
 657            with open(file_name + '.p', 'wb') as fb:
 658                pickle.dump(self, fb)
 659        else:
 660            raise Exception("Unknown datatype " + str(datatype))
 661
 662    def export_jackknife(self):
 663        """Export jackknife samples from the Obs
 664
 665        Returns
 666        -------
 667        numpy.ndarray
 668            Returns a numpy array of length N + 1 where N is the number of samples
 669            for the given ensemble and replicum. The zeroth entry of the array contains
 670            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 671            derived from the Obs. The current implementation only works for observables
 672            defined on exactly one ensemble and replicum. The derived jackknife samples
 673            should agree with samples from a full jackknife analysis up to O(1/N).
 674        """
 675
 676        if len(self.names) != 1:
 677            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 678
 679        name = self.names[0]
 680        full_data = self.deltas[name] + self.r_values[name]
 681        n = full_data.size
 682        mean = self.value
 683        tmp_jacks = np.zeros(n + 1)
 684        tmp_jacks[0] = mean
 685        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 686        return tmp_jacks
 687
 688    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
 689        """Export bootstrap samples from the Obs
 690
 691        Parameters
 692        ----------
 693        samples : int
 694            Number of bootstrap samples to generate.
 695        random_numbers : np.ndarray
 696            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
 697            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
 698        save_rng : str
 699            Save the random numbers to a file if a path is specified.
 700
 701        Returns
 702        -------
 703        numpy.ndarray
 704            Returns a numpy array of length N + 1 where N is the number of samples
 705            for the given ensemble and replicum. The zeroth entry of the array contains
 706            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
 707            derived from the Obs. The current implementation only works for observables
 708            defined on exactly one ensemble and replicum. The derived bootstrap samples
 709            should agree with samples from a full bootstrap analysis up to O(1/N).
 710        """
 711        if len(self.names) != 1:
 712            raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
 713
 714        name = self.names[0]
 715        length = self.N
 716
 717        if random_numbers is None:
 718            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
 719            rng = np.random.default_rng(seed)
 720            random_numbers = rng.integers(0, length, size=(samples, length))
 721
 722        if save_rng is not None:
 723            np.savetxt(save_rng, random_numbers, fmt='%i')
 724
 725        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
 726        ret = np.zeros(samples + 1)
 727        ret[0] = self.value
 728        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
 729        return ret
 730
 731    def __float__(self):
 732        return float(self.value)
 733
 734    def __repr__(self):
 735        return 'Obs[' + str(self) + ']'
 736
 737    def __str__(self):
 738        return _format_uncertainty(self.value, self._dvalue)
 739
 740    def __format__(self, format_type):
 741        if format_type == "":
 742            significance = 2
 743        else:
 744            significance = int(float(format_type.replace("+", "").replace("-", "")))
 745        my_str = _format_uncertainty(self.value, self._dvalue,
 746                                     significance=significance)
 747        for char in ["+", " "]:
 748            if format_type.startswith(char):
 749                if my_str[0] != "-":
 750                    my_str = char + my_str
 751        return my_str
 752
 753    def __hash__(self):
 754        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 755        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 756        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 757        hash_tuple += tuple([o.encode() for o in self.names])
 758        m = hashlib.md5()
 759        [m.update(o) for o in hash_tuple]
 760        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 761
 762    # Overload comparisons
 763    def __lt__(self, other):
 764        return self.value < other
 765
 766    def __le__(self, other):
 767        return self.value <= other
 768
 769    def __gt__(self, other):
 770        return self.value > other
 771
 772    def __ge__(self, other):
 773        return self.value >= other
 774
 775    def __eq__(self, other):
 776        if other is None:
 777            return False
 778        return (self - other).is_zero()
 779
 780    # Overload math operations
 781    def __add__(self, y):
 782        if isinstance(y, Obs):
 783            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 784        else:
 785            if isinstance(y, np.ndarray):
 786                return np.array([self + o for o in y])
 787            elif y.__class__.__name__ in ['Corr', 'CObs']:
 788                return NotImplemented
 789            else:
 790                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 791
 792    def __radd__(self, y):
 793        return self + y
 794
 795    def __mul__(self, y):
 796        if isinstance(y, Obs):
 797            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 798        else:
 799            if isinstance(y, np.ndarray):
 800                return np.array([self * o for o in y])
 801            elif isinstance(y, complex):
 802                return CObs(self * y.real, self * y.imag)
 803            elif y.__class__.__name__ in ['Corr', 'CObs']:
 804                return NotImplemented
 805            else:
 806                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 807
 808    def __rmul__(self, y):
 809        return self * y
 810
 811    def __sub__(self, y):
 812        if isinstance(y, Obs):
 813            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 814        else:
 815            if isinstance(y, np.ndarray):
 816                return np.array([self - o for o in y])
 817            elif y.__class__.__name__ in ['Corr', 'CObs']:
 818                return NotImplemented
 819            else:
 820                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 821
 822    def __rsub__(self, y):
 823        return -1 * (self - y)
 824
 825    def __pos__(self):
 826        return self
 827
 828    def __neg__(self):
 829        return -1 * self
 830
 831    def __truediv__(self, y):
 832        if isinstance(y, Obs):
 833            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 834        else:
 835            if isinstance(y, np.ndarray):
 836                return np.array([self / o for o in y])
 837            elif y.__class__.__name__ in ['Corr', 'CObs']:
 838                return NotImplemented
 839            else:
 840                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 841
 842    def __rtruediv__(self, y):
 843        if isinstance(y, Obs):
 844            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 845        else:
 846            if isinstance(y, np.ndarray):
 847                return np.array([o / self for o in y])
 848            elif y.__class__.__name__ in ['Corr', 'CObs']:
 849                return NotImplemented
 850            else:
 851                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 852
 853    def __pow__(self, y):
 854        if isinstance(y, Obs):
 855            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 856        else:
 857            return derived_observable(lambda x: x[0] ** y, [self])
 858
 859    def __rpow__(self, y):
 860        if isinstance(y, Obs):
 861            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 862        else:
 863            return derived_observable(lambda x: y ** x[0], [self])
 864
 865    def __abs__(self):
 866        return derived_observable(lambda x: anp.abs(x[0]), [self])
 867
 868    # Overload numpy functions
 869    def sqrt(self):
 870        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 871
 872    def log(self):
 873        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 874
 875    def exp(self):
 876        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 877
 878    def sin(self):
 879        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 880
 881    def cos(self):
 882        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 883
 884    def tan(self):
 885        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 886
 887    def arcsin(self):
 888        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 889
 890    def arccos(self):
 891        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 892
 893    def arctan(self):
 894        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 895
 896    def sinh(self):
 897        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 898
 899    def cosh(self):
 900        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 901
 902    def tanh(self):
 903        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 904
 905    def arcsinh(self):
 906        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 907
 908    def arccosh(self):
 909        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 910
 911    def arctanh(self):
 912        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 913
 914
 915class CObs:
 916    """Class for a complex valued observable."""
 917    __slots__ = ['_real', '_imag', 'tag']
 918
 919    def __init__(self, real, imag=0.0):
 920        self._real = real
 921        self._imag = imag
 922        self.tag = None
 923
 924    @property
 925    def real(self):
 926        return self._real
 927
 928    @property
 929    def imag(self):
 930        return self._imag
 931
 932    def gamma_method(self, **kwargs):
 933        """Executes the gamma_method for the real and the imaginary part."""
 934        if isinstance(self.real, Obs):
 935            self.real.gamma_method(**kwargs)
 936        if isinstance(self.imag, Obs):
 937            self.imag.gamma_method(**kwargs)
 938
 939    def is_zero(self):
 940        """Checks whether both real and imaginary part are zero within machine precision."""
 941        return self.real == 0.0 and self.imag == 0.0
 942
 943    def conjugate(self):
 944        return CObs(self.real, -self.imag)
 945
 946    def __add__(self, other):
 947        if isinstance(other, np.ndarray):
 948            return other + self
 949        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 950            return CObs(self.real + other.real,
 951                        self.imag + other.imag)
 952        else:
 953            return CObs(self.real + other, self.imag)
 954
 955    def __radd__(self, y):
 956        return self + y
 957
 958    def __sub__(self, other):
 959        if isinstance(other, np.ndarray):
 960            return -1 * (other - self)
 961        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 962            return CObs(self.real - other.real, self.imag - other.imag)
 963        else:
 964            return CObs(self.real - other, self.imag)
 965
 966    def __rsub__(self, other):
 967        return -1 * (self - other)
 968
 969    def __mul__(self, other):
 970        if isinstance(other, np.ndarray):
 971            return other * self
 972        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 973            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 974                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 975                                               [self.real, other.real, self.imag, other.imag],
 976                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 977                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 978                                               [self.real, other.real, self.imag, other.imag],
 979                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 980            elif getattr(other, 'imag', 0) != 0:
 981                return CObs(self.real * other.real - self.imag * other.imag,
 982                            self.imag * other.real + self.real * other.imag)
 983            else:
 984                return CObs(self.real * other.real, self.imag * other.real)
 985        else:
 986            return CObs(self.real * other, self.imag * other)
 987
 988    def __rmul__(self, other):
 989        return self * other
 990
 991    def __truediv__(self, other):
 992        if isinstance(other, np.ndarray):
 993            return 1 / (other / self)
 994        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 995            r = other.real ** 2 + other.imag ** 2
 996            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 997        else:
 998            return CObs(self.real / other, self.imag / other)
 999
1000    def __rtruediv__(self, other):
1001        r = self.real ** 2 + self.imag ** 2
1002        if hasattr(other, 'real') and hasattr(other, 'imag'):
1003            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1004        else:
1005            return CObs(self.real * other / r, -self.imag * other / r)
1006
1007    def __abs__(self):
1008        return np.sqrt(self.real**2 + self.imag**2)
1009
1010    def __pos__(self):
1011        return self
1012
1013    def __neg__(self):
1014        return -1 * self
1015
1016    def __eq__(self, other):
1017        return self.real == other.real and self.imag == other.imag
1018
1019    def __str__(self):
1020        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1021
1022    def __repr__(self):
1023        return 'CObs[' + str(self) + ']'
1024
1025    def __format__(self, format_type):
1026        if format_type == "":
1027            significance = 2
1028            format_type = "2"
1029        else:
1030            significance = int(float(format_type.replace("+", "").replace("-", "")))
1031        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"
1032
1033
1034def gamma_method(x, **kwargs):
1035    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1036
1037    See docstring of pe.Obs.gamma_method for details.
1038    """
1039    return np.vectorize(lambda o: o.gm(**kwargs))(x)
1040
1041
1042gm = gamma_method
1043
1044
1045def _format_uncertainty(value, dvalue, significance=2):
1046    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
1047    if dvalue == 0.0 or (not np.isfinite(dvalue)):
1048        return str(value)
1049    if not isinstance(significance, int):
1050        raise TypeError("significance needs to be an integer.")
1051    if significance < 1:
1052        raise ValueError("significance needs to be larger than zero.")
1053    fexp = np.floor(np.log10(dvalue))
1054    if fexp < 0.0:
1055        return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f')
1056    elif fexp == 0.0:
1057        return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})"
1058    else:
1059        return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})"
1060
1061
1062def _expand_deltas(deltas, idx, shape, gapsize):
1063    """Expand deltas defined on idx to a regular range with spacing gapsize between two
1064       configurations and where holes are filled by 0.
1065       If idx is of type range, the deltas are not changed if the idx.step == gapsize.
1066
1067    Parameters
1068    ----------
1069    deltas : list
1070        List of fluctuations
1071    idx : list
1072        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1073    shape : int
1074        Number of configs in idx.
1075    gapsize : int
1076        The target distance between two configurations. If longer distances
1077        are found in idx, the data is expanded.
1078    """
1079    if isinstance(idx, range):
1080        if (idx.step == gapsize):
1081            return deltas
1082    ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize)
1083    for i in range(shape):
1084        ret[(idx[i] - idx[0]) // gapsize] = deltas[i]
1085    return ret
1086
1087
1088def _merge_idx(idl):
1089    """Returns the union of all lists in idl as range or sorted list
1090
1091    Parameters
1092    ----------
1093    idl : list
1094        List of lists or ranges.
1095    """
1096
1097    if _check_lists_equal(idl):
1098        return idl[0]
1099
1100    idunion = sorted(set().union(*idl))
1101
1102    # Check whether idunion can be expressed as range
1103    idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0])
1104    idtest = [list(idrange), idunion]
1105    if _check_lists_equal(idtest):
1106        return idrange
1107
1108    return idunion
1109
1110
1111def _intersection_idx(idl):
1112    """Returns the intersection of all lists in idl as range or sorted list
1113
1114    Parameters
1115    ----------
1116    idl : list
1117        List of lists or ranges.
1118    """
1119
1120    if _check_lists_equal(idl):
1121        return idl[0]
1122
1123    idinter = sorted(set.intersection(*[set(o) for o in idl]))
1124
1125    # Check whether idinter can be expressed as range
1126    try:
1127        idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0])
1128        idtest = [list(idrange), idinter]
1129        if _check_lists_equal(idtest):
1130            return idrange
1131    except IndexError:
1132        pass
1133
1134    return idinter
1135
1136
1137def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1138    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1139       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1140       common divisor of the step sizes is used as new step size.
1141
1142    Parameters
1143    ----------
1144    deltas : list
1145        List of fluctuations
1146    idx : list
1147        List or range of configs on which the deltas are defined.
1148        Has to be a subset of new_idx and has to be sorted in ascending order.
1149    shape : list
1150        Number of configs in idx.
1151    new_idx : list
1152        List of configs that defines the new range, has to be sorted in ascending order.
1153    """
1154
1155    if type(idx) is range and type(new_idx) is range:
1156        if idx == new_idx:
1157            return deltas
1158    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1159    for i in range(shape):
1160        ret[idx[i] - new_idx[0]] = deltas[i]
1161    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx)
1162
1163
1164def derived_observable(func, data, array_mode=False, **kwargs):
1165    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1166
1167    Parameters
1168    ----------
1169    func : object
1170        arbitrary function of the form func(data, **kwargs). For the
1171        automatic differentiation to work, all numpy functions have to have
1172        the autograd wrapper (use 'import autograd.numpy as anp').
1173    data : list
1174        list of Obs, e.g. [obs1, obs2, obs3].
1175    num_grad : bool
1176        if True, numerical derivatives are used instead of autograd
1177        (default False). To control the numerical differentiation the
1178        kwargs of numdifftools.step_generators.MaxStepGenerator
1179        can be used.
1180    man_grad : list
1181        manually supply a list or an array which contains the jacobian
1182        of func. Use cautiously, supplying the wrong derivative will
1183        not be intercepted.
1184
1185    Notes
1186    -----
1187    For simple mathematical operations it can be practical to use anonymous
1188    functions. For the ratio of two observables one can e.g. use
1189
1190    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1191    """
1192
1193    data = np.asarray(data)
1194    raveled_data = data.ravel()
1195
1196    # Workaround for matrix operations containing non Obs data
1197    if not all(isinstance(x, Obs) for x in raveled_data):
1198        for i in range(len(raveled_data)):
1199            if isinstance(raveled_data[i], (int, float)):
1200                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1201
1202    allcov = {}
1203    for o in raveled_data:
1204        for name in o.cov_names:
1205            if name in allcov:
1206                if not np.allclose(allcov[name], o.covobs[name].cov):
1207                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1208            else:
1209                allcov[name] = o.covobs[name].cov
1210
1211    n_obs = len(raveled_data)
1212    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1213    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1214    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1215
1216    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1217
1218    if data.ndim == 1:
1219        values = np.array([o.value for o in data])
1220    else:
1221        values = np.vectorize(lambda x: x.value)(data)
1222
1223    new_values = func(values, **kwargs)
1224
1225    multi = int(isinstance(new_values, np.ndarray))
1226
1227    new_r_values = {}
1228    new_idl_d = {}
1229    for name in new_sample_names:
1230        idl = []
1231        tmp_values = np.zeros(n_obs)
1232        for i, item in enumerate(raveled_data):
1233            tmp_values[i] = item.r_values.get(name, item.value)
1234            tmp_idl = item.idl.get(name)
1235            if tmp_idl is not None:
1236                idl.append(tmp_idl)
1237        if multi > 0:
1238            tmp_values = np.array(tmp_values).reshape(data.shape)
1239        new_r_values[name] = func(tmp_values, **kwargs)
1240        new_idl_d[name] = _merge_idx(idl)
1241
1242    if 'man_grad' in kwargs:
1243        deriv = np.asarray(kwargs.get('man_grad'))
1244        if new_values.shape + data.shape != deriv.shape:
1245            raise Exception('Manual derivative does not have correct shape.')
1246    elif kwargs.get('num_grad') is True:
1247        if multi > 0:
1248            raise Exception('Multi mode currently not supported for numerical derivative')
1249        options = {
1250            'base_step': 0.1,
1251            'step_ratio': 2.5}
1252        for key in options.keys():
1253            kwarg = kwargs.get(key)
1254            if kwarg is not None:
1255                options[key] = kwarg
1256        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1257        if tmp_df.size == 1:
1258            deriv = np.array([tmp_df.real])
1259        else:
1260            deriv = tmp_df.real
1261    else:
1262        deriv = jacobian(func)(values, **kwargs)
1263
1264    final_result = np.zeros(new_values.shape, dtype=object)
1265
1266    if array_mode is True:
1267
1268        class _Zero_grad():
1269            def __init__(self, N):
1270                self.grad = np.zeros((N, 1))
1271
1272        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1273        d_extracted = {}
1274        g_extracted = {}
1275        for name in new_sample_names:
1276            d_extracted[name] = []
1277            ens_length = len(new_idl_d[name])
1278            for i_dat, dat in enumerate(data):
1279                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1280        for name in new_cov_names:
1281            g_extracted[name] = []
1282            zero_grad = _Zero_grad(new_covobs_lengths[name])
1283            for i_dat, dat in enumerate(data):
1284                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1285
1286    for i_val, new_val in np.ndenumerate(new_values):
1287        new_deltas = {}
1288        new_grad = {}
1289        if array_mode is True:
1290            for name in new_sample_names:
1291                ens_length = d_extracted[name][0].shape[-1]
1292                new_deltas[name] = np.zeros(ens_length)
1293                for i_dat, dat in enumerate(d_extracted[name]):
1294                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1295            for name in new_cov_names:
1296                new_grad[name] = 0
1297                for i_dat, dat in enumerate(g_extracted[name]):
1298                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1299        else:
1300            for j_obs, obs in np.ndenumerate(data):
1301                for name in obs.names:
1302                    if name in obs.cov_names:
1303                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1304                    else:
1305                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1306
1307        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1308
1309        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1310            raise Exception('The same name has been used for deltas and covobs!')
1311        new_samples = []
1312        new_means = []
1313        new_idl = []
1314        new_names_obs = []
1315        for name in new_names:
1316            if name not in new_covobs:
1317                new_samples.append(new_deltas[name])
1318                new_idl.append(new_idl_d[name])
1319                new_means.append(new_r_values[name][i_val])
1320                new_names_obs.append(name)
1321        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1322        for name in new_covobs:
1323            final_result[i_val].names.append(name)
1324        final_result[i_val]._covobs = new_covobs
1325        final_result[i_val]._value = new_val
1326        final_result[i_val].reweighted = reweighted
1327
1328    if multi == 0:
1329        final_result = final_result.item()
1330
1331    return final_result
1332
1333
1334def _reduce_deltas(deltas, idx_old, idx_new):
1335    """Extract deltas defined on idx_old on all configs of idx_new.
1336
1337    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1338    are ordered in an ascending order.
1339
1340    Parameters
1341    ----------
1342    deltas : list
1343        List of fluctuations
1344    idx_old : list
1345        List or range of configs on which the deltas are defined
1346    idx_new : list
1347        List of configs for which we want to extract the deltas.
1348        Has to be a subset of idx_old.
1349    """
1350    if not len(deltas) == len(idx_old):
1351        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1352    if type(idx_old) is range and type(idx_new) is range:
1353        if idx_old == idx_new:
1354            return deltas
1355    if _check_lists_equal([idx_old, idx_new]):
1356        return deltas
1357    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1358    if len(indices) < len(idx_new):
1359        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1360    return np.array(deltas)[indices]
1361
1362
1363def reweight(weight, obs, **kwargs):
1364    """Reweight a list of observables.
1365
1366    Parameters
1367    ----------
1368    weight : Obs
1369        Reweighting factor. An Observable that has to be defined on a superset of the
1370        configurations in obs[i].idl for all i.
1371    obs : list
1372        list of Obs, e.g. [obs1, obs2, obs3].
1373    all_configs : bool
1374        if True, the reweighted observables are normalized by the average of
1375        the reweighting factor on all configurations in weight.idl and not
1376        on the configurations in obs[i].idl. Default False.
1377    """
1378    result = []
1379    for i in range(len(obs)):
1380        if len(obs[i].cov_names):
1381            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1382        if not set(obs[i].names).issubset(weight.names):
1383            raise Exception('Error: Ensembles do not fit')
1384        for name in obs[i].names:
1385            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1386                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1387        new_samples = []
1388        w_deltas = {}
1389        for name in sorted(obs[i].names):
1390            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1391            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1392        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1393
1394        if kwargs.get('all_configs'):
1395            new_weight = weight
1396        else:
1397            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1398
1399        result.append(tmp_obs / new_weight)
1400        result[-1].reweighted = True
1401
1402    return result
1403
1404
1405def correlate(obs_a, obs_b):
1406    """Correlate two observables.
1407
1408    Parameters
1409    ----------
1410    obs_a : Obs
1411        First observable
1412    obs_b : Obs
1413        Second observable
1414
1415    Notes
1416    -----
1417    Keep in mind to only correlate primary observables which have not been reweighted
1418    yet. The reweighting has to be applied after correlating the observables.
1419    Currently only works if ensembles are identical (this is not strictly necessary).
1420    """
1421
1422    if sorted(obs_a.names) != sorted(obs_b.names):
1423        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1424    if len(obs_a.cov_names) or len(obs_b.cov_names):
1425        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1426    for name in obs_a.names:
1427        if obs_a.shape[name] != obs_b.shape[name]:
1428            raise Exception('Shapes of ensemble', name, 'do not fit')
1429        if obs_a.idl[name] != obs_b.idl[name]:
1430            raise Exception('idl of ensemble', name, 'do not fit')
1431
1432    if obs_a.reweighted is True:
1433        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1434    if obs_b.reweighted is True:
1435        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1436
1437    new_samples = []
1438    new_idl = []
1439    for name in sorted(obs_a.names):
1440        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1441        new_idl.append(obs_a.idl[name])
1442
1443    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1444    o.reweighted = obs_a.reweighted or obs_b.reweighted
1445    return o
1446
1447
1448def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1449    r'''Calculates the error covariance matrix of a set of observables.
1450
1451    WARNING: This function should be used with care, especially for observables with support on multiple
1452             ensembles with differing autocorrelations. See the notes below for details.
1453
1454    The gamma method has to be applied first to all observables.
1455
1456    Parameters
1457    ----------
1458    obs : list or numpy.ndarray
1459        List or one dimensional array of Obs
1460    visualize : bool
1461        If True plots the corresponding normalized correlation matrix (default False).
1462    correlation : bool
1463        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1464    smooth : None or int
1465        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1466        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1467        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1468        small ones.
1469
1470    Notes
1471    -----
1472    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1473    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1474    in the absence of autocorrelation.
1475    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1476    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1477    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1478    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1479    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1480    '''
1481
1482    length = len(obs)
1483
1484    max_samples = np.max([o.N for o in obs])
1485    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1486        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1487
1488    cov = np.zeros((length, length))
1489    for i in range(length):
1490        for j in range(i, length):
1491            cov[i, j] = _covariance_element(obs[i], obs[j])
1492    cov = cov + cov.T - np.diag(np.diag(cov))
1493
1494    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1495
1496    if isinstance(smooth, int):
1497        corr = _smooth_eigenvalues(corr, smooth)
1498
1499    if visualize:
1500        plt.matshow(corr, vmin=-1, vmax=1)
1501        plt.set_cmap('RdBu')
1502        plt.colorbar()
1503        plt.draw()
1504
1505    if correlation is True:
1506        return corr
1507
1508    errors = [o.dvalue for o in obs]
1509    cov = np.diag(errors) @ corr @ np.diag(errors)
1510
1511    eigenvalues = np.linalg.eigh(cov)[0]
1512    if not np.all(eigenvalues >= 0):
1513        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1514
1515    return cov
1516
1517
1518def _smooth_eigenvalues(corr, E):
1519    """Eigenvalue smoothing as described in hep-lat/9412087
1520
1521    corr : np.ndarray
1522        correlation matrix
1523    E : integer
1524        Number of eigenvalues to be left substantially unchanged
1525    """
1526    if not (2 < E < corr.shape[0] - 1):
1527        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1528    vals, vec = np.linalg.eigh(corr)
1529    lambda_min = np.mean(vals[:-E])
1530    vals[vals < lambda_min] = lambda_min
1531    vals /= np.mean(vals)
1532    return vec @ np.diag(vals) @ vec.T
1533
1534
1535def _covariance_element(obs1, obs2):
1536    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1537
1538    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1539        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1540        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1541        return np.sum(deltas1 * deltas2)
1542
1543    if set(obs1.names).isdisjoint(set(obs2.names)):
1544        return 0.0
1545
1546    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1547        raise Exception('The gamma method has to be applied to both Obs first.')
1548
1549    dvalue = 0.0
1550
1551    for e_name in obs1.mc_names:
1552
1553        if e_name not in obs2.mc_names:
1554            continue
1555
1556        idl_d = {}
1557        for r_name in obs1.e_content[e_name]:
1558            if r_name not in obs2.e_content[e_name]:
1559                continue
1560            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1561
1562        gamma = 0.0
1563
1564        for r_name in obs1.e_content[e_name]:
1565            if r_name not in obs2.e_content[e_name]:
1566                continue
1567            if len(idl_d[r_name]) == 0:
1568                continue
1569            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1570
1571        if gamma == 0.0:
1572            continue
1573
1574        gamma_div = 0.0
1575        for r_name in obs1.e_content[e_name]:
1576            if r_name not in obs2.e_content[e_name]:
1577                continue
1578            if len(idl_d[r_name]) == 0:
1579                continue
1580            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1581        gamma /= gamma_div
1582
1583        dvalue += gamma
1584
1585    for e_name in obs1.cov_names:
1586
1587        if e_name not in obs2.cov_names:
1588            continue
1589
1590        dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item()
1591
1592    return dvalue
1593
1594
1595def import_jackknife(jacks, name, idl=None):
1596    """Imports jackknife samples and returns an Obs
1597
1598    Parameters
1599    ----------
1600    jacks : numpy.ndarray
1601        numpy array containing the mean value as zeroth entry and
1602        the N jackknife samples as first to Nth entry.
1603    name : str
1604        name of the ensemble the samples are defined on.
1605    """
1606    length = len(jacks) - 1
1607    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1608    samples = jacks[1:] @ prj
1609    mean = np.mean(samples)
1610    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1611    new_obs._value = jacks[0]
1612    return new_obs
1613
1614
1615def import_bootstrap(boots, name, random_numbers):
1616    """Imports bootstrap samples and returns an Obs
1617
1618    Parameters
1619    ----------
1620    boots : numpy.ndarray
1621        numpy array containing the mean value as zeroth entry and
1622        the N bootstrap samples as first to Nth entry.
1623    name : str
1624        name of the ensemble the samples are defined on.
1625    random_numbers : np.ndarray
1626        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1627        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1628        chain to be reconstructed.
1629    """
1630    samples, length = random_numbers.shape
1631    if samples != len(boots) - 1:
1632        raise ValueError("Random numbers do not have the correct shape.")
1633
1634    if samples < length:
1635        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1636
1637    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1638
1639    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1640    ret = Obs([samples], [name])
1641    ret._value = boots[0]
1642    return ret
1643
1644
1645def merge_obs(list_of_obs):
1646    """Combine all observables in list_of_obs into one new observable
1647
1648    Parameters
1649    ----------
1650    list_of_obs : list
1651        list of the Obs object to be combined
1652
1653    Notes
1654    -----
1655    It is not possible to combine obs which are based on the same replicum
1656    """
1657    replist = [item for obs in list_of_obs for item in obs.names]
1658    if (len(replist) == len(set(replist))) is False:
1659        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1660    if any([len(o.cov_names) for o in list_of_obs]):
1661        raise Exception('Not possible to merge data that contains covobs!')
1662    new_dict = {}
1663    idl_dict = {}
1664    for o in list_of_obs:
1665        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1666                        for key in set(o.deltas) | set(o.r_values)})
1667        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1668
1669    names = sorted(new_dict.keys())
1670    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1671    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1672    return o
1673
1674
1675def cov_Obs(means, cov, name, grad=None):
1676    """Create an Obs based on mean(s) and a covariance matrix
1677
1678    Parameters
1679    ----------
1680    mean : list of floats or float
1681        N mean value(s) of the new Obs
1682    cov : list or array
1683        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1684    name : str
1685        identifier for the covariance matrix
1686    grad : list or array
1687        Gradient of the Covobs wrt. the means belonging to cov.
1688    """
1689
1690    def covobs_to_obs(co):
1691        """Make an Obs out of a Covobs
1692
1693        Parameters
1694        ----------
1695        co : Covobs
1696            Covobs to be embedded into the Obs
1697        """
1698        o = Obs([], [], means=[])
1699        o._value = co.value
1700        o.names.append(co.name)
1701        o._covobs[co.name] = co
1702        o._dvalue = np.sqrt(co.errsq())
1703        return o
1704
1705    ol = []
1706    if isinstance(means, (float, int)):
1707        means = [means]
1708
1709    for i in range(len(means)):
1710        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1711    if ol[0].covobs[name].N != len(means):
1712        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1713    if len(ol) == 1:
1714        return ol[0]
1715    return ol
1716
1717
1718def _determine_gap(o, e_content, e_name):
1719    gaps = []
1720    for r_name in e_content[e_name]:
1721        if isinstance(o.idl[r_name], range):
1722            gaps.append(o.idl[r_name].step)
1723        else:
1724            gaps.append(np.min(np.diff(o.idl[r_name])))
1725
1726    gap = min(gaps)
1727    if not np.all([gi % gap == 0 for gi in gaps]):
1728        raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps)
1729
1730    return gap
1731
1732
1733def _check_lists_equal(idl):
1734    '''
1735    Use groupby to efficiently check whether all elements of idl are identical.
1736    Returns True if all elements are equal, otherwise False.
1737
1738    Parameters
1739    ----------
1740    idl : list of lists, ranges or np.ndarrays
1741    '''
1742    g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl])
1743    if next(g, True) and not next(g, False):
1744        return True
1745    return False
class Obs:
 19class Obs:
 20    """Class for a general observable.
 21
 22    Instances of Obs are the basic objects of a pyerrors error analysis.
 23    They are initialized with a list which contains arrays of samples for
 24    different ensembles/replica and another list of same length which contains
 25    the names of the ensembles/replica. Mathematical operations can be
 26    performed on instances. The result is another instance of Obs. The error of
 27    an instance can be computed with the gamma_method. Also contains additional
 28    methods for output and visualization of the error calculation.
 29
 30    Attributes
 31    ----------
 32    S_global : float
 33        Standard value for S (default 2.0)
 34    S_dict : dict
 35        Dictionary for S values. If an entry for a given ensemble
 36        exists this overwrites the standard value for that ensemble.
 37    tau_exp_global : float
 38        Standard value for tau_exp (default 0.0)
 39    tau_exp_dict : dict
 40        Dictionary for tau_exp values. If an entry for a given ensemble exists
 41        this overwrites the standard value for that ensemble.
 42    N_sigma_global : float
 43        Standard value for N_sigma (default 1.0)
 44    N_sigma_dict : dict
 45        Dictionary for N_sigma values. If an entry for a given ensemble exists
 46        this overwrites the standard value for that ensemble.
 47    """
 48    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 49                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 50                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 51                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 52                 'idl', 'tag', '_covobs', '__dict__']
 53
 54    S_global = 2.0
 55    S_dict = {}
 56    tau_exp_global = 0.0
 57    tau_exp_dict = {}
 58    N_sigma_global = 1.0
 59    N_sigma_dict = {}
 60
 61    def __init__(self, samples, names, idl=None, **kwargs):
 62        """ Initialize Obs object.
 63
 64        Parameters
 65        ----------
 66        samples : list
 67            list of numpy arrays containing the Monte Carlo samples
 68        names : list
 69            list of strings labeling the individual samples
 70        idl : list, optional
 71            list of ranges or lists on which the samples are defined
 72        """
 73
 74        if kwargs.get("means") is None and len(samples):
 75            if len(samples) != len(names):
 76                raise ValueError('Length of samples and names incompatible.')
 77            if idl is not None:
 78                if len(idl) != len(names):
 79                    raise ValueError('Length of idl incompatible with samples and names.')
 80            name_length = len(names)
 81            if name_length > 1:
 82                if name_length != len(set(names)):
 83                    raise ValueError('Names are not unique.')
 84                if not all(isinstance(x, str) for x in names):
 85                    raise TypeError('All names have to be strings.')
 86            else:
 87                if not isinstance(names[0], str):
 88                    raise TypeError('All names have to be strings.')
 89            if min(len(x) for x in samples) <= 4:
 90                raise ValueError('Samples have to have at least 5 entries.')
 91
 92        self.names = sorted(names)
 93        self.shape = {}
 94        self.r_values = {}
 95        self.deltas = {}
 96        self._covobs = {}
 97
 98        self._value = 0
 99        self.N = 0
100        self.idl = {}
101        if idl is not None:
102            for name, idx in sorted(zip(names, idl)):
103                if isinstance(idx, range):
104                    self.idl[name] = idx
105                elif isinstance(idx, (list, np.ndarray)):
106                    dc = np.unique(np.diff(idx))
107                    if np.any(dc < 0):
108                        raise ValueError("Unsorted idx for idl[%s]" % (name))
109                    if len(dc) == 1:
110                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
111                    else:
112                        self.idl[name] = list(idx)
113                else:
114                    raise TypeError('incompatible type for idl[%s].' % (name))
115        else:
116            for name, sample in sorted(zip(names, samples)):
117                self.idl[name] = range(1, len(sample) + 1)
118
119        if kwargs.get("means") is not None:
120            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
121                self.shape[name] = len(self.idl[name])
122                self.N += self.shape[name]
123                self.r_values[name] = mean
124                self.deltas[name] = sample
125        else:
126            for name, sample in sorted(zip(names, samples)):
127                self.shape[name] = len(self.idl[name])
128                self.N += self.shape[name]
129                if len(sample) != self.shape[name]:
130                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
131                self.r_values[name] = np.mean(sample)
132                self.deltas[name] = sample - self.r_values[name]
133                self._value += self.shape[name] * self.r_values[name]
134            self._value /= self.N
135
136        self._dvalue = 0.0
137        self.ddvalue = 0.0
138        self.reweighted = False
139
140        self.tag = None
141
142    @property
143    def value(self):
144        return self._value
145
146    @property
147    def dvalue(self):
148        return self._dvalue
149
150    @property
151    def e_names(self):
152        return sorted(set([o.split('|')[0] for o in self.names]))
153
154    @property
155    def cov_names(self):
156        return sorted(set([o for o in self.covobs.keys()]))
157
158    @property
159    def mc_names(self):
160        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
161
162    @property
163    def e_content(self):
164        res = {}
165        for e, e_name in enumerate(self.e_names):
166            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
167            if e_name in self.names:
168                res[e_name].append(e_name)
169        return res
170
171    @property
172    def covobs(self):
173        return self._covobs
174
175    def gamma_method(self, **kwargs):
176        """Estimate the error and related properties of the Obs.
177
178        Parameters
179        ----------
180        S : float
181            specifies a custom value for the parameter S (default 2.0).
182            If set to 0 it is assumed that the data exhibits no
183            autocorrelation. In this case the error estimates coincides
184            with the sample standard error.
185        tau_exp : float
186            positive value triggers the critical slowing down analysis
187            (default 0.0).
188        N_sigma : float
189            number of standard deviations from zero until the tail is
190            attached to the autocorrelation function (default 1).
191        fft : bool
192            determines whether the fft algorithm is used for the computation
193            of the autocorrelation function (default True)
194        """
195
196        e_content = self.e_content
197        self.e_dvalue = {}
198        self.e_ddvalue = {}
199        self.e_tauint = {}
200        self.e_dtauint = {}
201        self.e_windowsize = {}
202        self.e_n_tauint = {}
203        self.e_n_dtauint = {}
204        e_gamma = {}
205        self.e_rho = {}
206        self.e_drho = {}
207        self._dvalue = 0
208        self.ddvalue = 0
209
210        self.S = {}
211        self.tau_exp = {}
212        self.N_sigma = {}
213
214        if kwargs.get('fft') is False:
215            fft = False
216        else:
217            fft = True
218
219        def _parse_kwarg(kwarg_name):
220            if kwarg_name in kwargs:
221                tmp = kwargs.get(kwarg_name)
222                if isinstance(tmp, (int, float)):
223                    if tmp < 0:
224                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
225                    for e, e_name in enumerate(self.e_names):
226                        getattr(self, kwarg_name)[e_name] = tmp
227                else:
228                    raise TypeError(kwarg_name + ' is not in proper format.')
229            else:
230                for e, e_name in enumerate(self.e_names):
231                    if e_name in getattr(Obs, kwarg_name + '_dict'):
232                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
233                    else:
234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
235
236        _parse_kwarg('S')
237        _parse_kwarg('tau_exp')
238        _parse_kwarg('N_sigma')
239
240        for e, e_name in enumerate(self.mc_names):
241            gapsize = _determine_gap(self, e_content, e_name)
242
243            r_length = []
244            for r_name in e_content[e_name]:
245                if isinstance(self.idl[r_name], range):
246                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
247                else:
248                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
249
250            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
251            w_max = max(r_length) // 2
252            e_gamma[e_name] = np.zeros(w_max)
253            self.e_rho[e_name] = np.zeros(w_max)
254            self.e_drho[e_name] = np.zeros(w_max)
255
256            for r_name in e_content[e_name]:
257                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
258
259            gamma_div = np.zeros(w_max)
260            for r_name in e_content[e_name]:
261                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
262            gamma_div[gamma_div < 1] = 1.0
263            e_gamma[e_name] /= gamma_div[:w_max]
264
265            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
266                self.e_tauint[e_name] = 0.5
267                self.e_dtauint[e_name] = 0.0
268                self.e_dvalue[e_name] = 0.0
269                self.e_ddvalue[e_name] = 0.0
270                self.e_windowsize[e_name] = 0
271                continue
272
273            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
274            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
275            # Make sure no entry of tauint is smaller than 0.5
276            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
277            # hep-lat/0306017 eq. (42)
278            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
279            self.e_n_dtauint[e_name][0] = 0.0
280
281            def _compute_drho(i):
282                tmp = (self.e_rho[e_name][i + 1:w_max]
283                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
284                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
285                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
286                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
287
288            if self.tau_exp[e_name] > 0:
289                _compute_drho(1)
290                texp = self.tau_exp[e_name]
291                # Critical slowing down analysis
292                if w_max // 2 <= 1:
293                    raise Exception("Need at least 8 samples for tau_exp error analysis")
294                for n in range(1, w_max // 2):
295                    _compute_drho(n + 1)
296                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
297                        # Bias correction hep-lat/0306017 eq. (49) included
298                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
299                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
300                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
301                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
302                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
303                        self.e_windowsize[e_name] = n
304                        break
305            else:
306                if self.S[e_name] == 0.0:
307                    self.e_tauint[e_name] = 0.5
308                    self.e_dtauint[e_name] = 0.0
309                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
310                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
311                    self.e_windowsize[e_name] = 0
312                else:
313                    # Standard automatic windowing procedure
314                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
315                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
316                    for n in range(1, w_max):
317                        if g_w[n - 1] < 0 or n >= w_max - 1:
318                            _compute_drho(n)
319                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
320                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
321                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
322                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
323                            self.e_windowsize[e_name] = n
324                            break
325
326            self._dvalue += self.e_dvalue[e_name] ** 2
327            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
328
329        for e_name in self.cov_names:
330            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
331            self.e_ddvalue[e_name] = 0
332            self._dvalue += self.e_dvalue[e_name]**2
333
334        self._dvalue = np.sqrt(self._dvalue)
335        if self._dvalue == 0.0:
336            self.ddvalue = 0.0
337        else:
338            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
339        return
340
341    gm = gamma_method
342
343    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
344        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
345           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
346
347        Parameters
348        ----------
349        deltas : list
350            List of fluctuations
351        idx : list
352            List or range of configurations on which the deltas are defined.
353        shape : int
354            Number of configurations in idx.
355        w_max : int
356            Upper bound for the summation window.
357        fft : bool
358            determines whether the fft algorithm is used for the computation
359            of the autocorrelation function.
360        gapsize : int
361            The target distance between two configurations. If longer distances
362            are found in idx, the data is expanded.
363        """
364        gamma = np.zeros(w_max)
365        deltas = _expand_deltas(deltas, idx, shape, gapsize)
366        new_shape = len(deltas)
367        if fft:
368            max_gamma = min(new_shape, w_max)
369            # The padding for the fft has to be even
370            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
371            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
372        else:
373            for n in range(w_max):
374                if new_shape - n >= 0:
375                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
376
377        return gamma
378
379    def details(self, ens_content=True):
380        """Output detailed properties of the Obs.
381
382        Parameters
383        ----------
384        ens_content : bool
385            print details about the ensembles and replica if true.
386        """
387        if self.tag is not None:
388            print("Description:", self.tag)
389        if not hasattr(self, 'e_dvalue'):
390            print('Result\t %3.8e' % (self.value))
391        else:
392            if self.value == 0.0:
393                percentage = np.nan
394            else:
395                percentage = np.abs(self._dvalue / self.value) * 100
396            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
397            if len(self.e_names) > 1:
398                print(' Ensemble errors:')
399            e_content = self.e_content
400            for e_name in self.mc_names:
401                gap = _determine_gap(self, e_content, e_name)
402
403                if len(self.e_names) > 1:
404                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
405                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
406                tau_string += f" in units of {gap} config"
407                if gap > 1:
408                    tau_string += "s"
409                if self.tau_exp[e_name] > 0:
410                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
411                else:
412                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
413                print(tau_string)
414            for e_name in self.cov_names:
415                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
416        if ens_content is True:
417            if len(self.e_names) == 1:
418                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
419            else:
420                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
421            my_string_list = []
422            for key, value in sorted(self.e_content.items()):
423                if key not in self.covobs:
424                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
425                    if len(value) == 1:
426                        my_string += f': {self.shape[value[0]]} configurations'
427                        if isinstance(self.idl[value[0]], range):
428                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
429                        else:
430                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
431                    else:
432                        sublist = []
433                        for v in value:
434                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
435                            my_substring += f': {self.shape[v]} configurations'
436                            if isinstance(self.idl[v], range):
437                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
438                            else:
439                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
440                            sublist.append(my_substring)
441
442                        my_string += '\n' + '\n'.join(sublist)
443                else:
444                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
445                my_string_list.append(my_string)
446            print('\n'.join(my_string_list))
447
448    def reweight(self, weight):
449        """Reweight the obs with given rewighting factors.
450
451        Parameters
452        ----------
453        weight : Obs
454            Reweighting factor. An Observable that has to be defined on a superset of the
455            configurations in obs[i].idl for all i.
456        all_configs : bool
457            if True, the reweighted observables are normalized by the average of
458            the reweighting factor on all configurations in weight.idl and not
459            on the configurations in obs[i].idl. Default False.
460        """
461        return reweight(weight, [self])[0]
462
463    def is_zero_within_error(self, sigma=1):
464        """Checks whether the observable is zero within 'sigma' standard errors.
465
466        Parameters
467        ----------
468        sigma : int
469            Number of standard errors used for the check.
470
471        Works only properly when the gamma method was run.
472        """
473        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
474
475    def is_zero(self, atol=1e-10):
476        """Checks whether the observable is zero within a given tolerance.
477
478        Parameters
479        ----------
480        atol : float
481            Absolute tolerance (for details see numpy documentation).
482        """
483        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
484
485    def plot_tauint(self, save=None):
486        """Plot integrated autocorrelation time for each ensemble.
487
488        Parameters
489        ----------
490        save : str
491            saves the figure to a file named 'save' if.
492        """
493        if not hasattr(self, 'e_dvalue'):
494            raise Exception('Run the gamma method first.')
495
496        for e, e_name in enumerate(self.mc_names):
497            fig = plt.figure()
498            plt.xlabel(r'$W$')
499            plt.ylabel(r'$\tau_\mathrm{int}$')
500            length = int(len(self.e_n_tauint[e_name]))
501            if self.tau_exp[e_name] > 0:
502                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
503                x_help = np.arange(2 * self.tau_exp[e_name])
504                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
505                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
506                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
507                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
508                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
509                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
510                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
511            else:
512                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
513                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
514
515            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
516            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
517            plt.legend()
518            plt.xlim(-0.5, xmax)
519            ylim = plt.ylim()
520            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
521            plt.draw()
522            if save:
523                fig.savefig(save + "_" + str(e))
524
525    def plot_rho(self, save=None):
526        """Plot normalized autocorrelation function time for each ensemble.
527
528        Parameters
529        ----------
530        save : str
531            saves the figure to a file named 'save' if.
532        """
533        if not hasattr(self, 'e_dvalue'):
534            raise Exception('Run the gamma method first.')
535        for e, e_name in enumerate(self.mc_names):
536            fig = plt.figure()
537            plt.xlabel('W')
538            plt.ylabel('rho')
539            length = int(len(self.e_drho[e_name]))
540            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
541            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
542            if self.tau_exp[e_name] > 0:
543                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
544                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
545                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
546                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
547            else:
548                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
549                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
550            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
551            plt.xlim(-0.5, xmax)
552            plt.draw()
553            if save:
554                fig.savefig(save + "_" + str(e))
555
556    def plot_rep_dist(self):
557        """Plot replica distribution for each ensemble with more than one replicum."""
558        if not hasattr(self, 'e_dvalue'):
559            raise Exception('Run the gamma method first.')
560        for e, e_name in enumerate(self.mc_names):
561            if len(self.e_content[e_name]) == 1:
562                print('No replica distribution for a single replicum (', e_name, ')')
563                continue
564            r_length = []
565            sub_r_mean = 0
566            for r, r_name in enumerate(self.e_content[e_name]):
567                r_length.append(len(self.deltas[r_name]))
568                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
569            e_N = np.sum(r_length)
570            sub_r_mean /= e_N
571            arr = np.zeros(len(self.e_content[e_name]))
572            for r, r_name in enumerate(self.e_content[e_name]):
573                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
574            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
575            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
576            plt.draw()
577
578    def plot_history(self, expand=True):
579        """Plot derived Monte Carlo history for each ensemble
580
581        Parameters
582        ----------
583        expand : bool
584            show expanded history for irregular Monte Carlo chains (default: True).
585        """
586        for e, e_name in enumerate(self.mc_names):
587            plt.figure()
588            r_length = []
589            tmp = []
590            tmp_expanded = []
591            for r, r_name in enumerate(self.e_content[e_name]):
592                tmp.append(self.deltas[r_name] + self.r_values[r_name])
593                if expand:
594                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
595                    r_length.append(len(tmp_expanded[-1]))
596                else:
597                    r_length.append(len(tmp[-1]))
598            e_N = np.sum(r_length)
599            x = np.arange(e_N)
600            y_test = np.concatenate(tmp, axis=0)
601            if expand:
602                y = np.concatenate(tmp_expanded, axis=0)
603            else:
604                y = y_test
605            plt.errorbar(x, y, fmt='.', markersize=3)
606            plt.xlim(-0.5, e_N - 0.5)
607            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
608            plt.draw()
609
610    def plot_piechart(self, save=None):
611        """Plot piechart which shows the fractional contribution of each
612        ensemble to the error and returns a dictionary containing the fractions.
613
614        Parameters
615        ----------
616        save : str
617            saves the figure to a file named 'save' if.
618        """
619        if not hasattr(self, 'e_dvalue'):
620            raise Exception('Run the gamma method first.')
621        if np.isclose(0.0, self._dvalue, atol=1e-15):
622            raise Exception('Error is 0.0')
623        labels = self.e_names
624        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
625        fig1, ax1 = plt.subplots()
626        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
627        ax1.axis('equal')
628        plt.draw()
629        if save:
630            fig1.savefig(save)
631
632        return dict(zip(labels, sizes))
633
634    def dump(self, filename, datatype="json.gz", description="", **kwargs):
635        """Dump the Obs to a file 'name' of chosen format.
636
637        Parameters
638        ----------
639        filename : str
640            name of the file to be saved.
641        datatype : str
642            Format of the exported file. Supported formats include
643            "json.gz" and "pickle"
644        description : str
645            Description for output file, only relevant for json.gz format.
646        path : str
647            specifies a custom path for the file (default '.')
648        """
649        if 'path' in kwargs:
650            file_name = kwargs.get('path') + '/' + filename
651        else:
652            file_name = filename
653
654        if datatype == "json.gz":
655            from .input.json import dump_to_json
656            dump_to_json([self], file_name, description=description)
657        elif datatype == "pickle":
658            with open(file_name + '.p', 'wb') as fb:
659                pickle.dump(self, fb)
660        else:
661            raise Exception("Unknown datatype " + str(datatype))
662
663    def export_jackknife(self):
664        """Export jackknife samples from the Obs
665
666        Returns
667        -------
668        numpy.ndarray
669            Returns a numpy array of length N + 1 where N is the number of samples
670            for the given ensemble and replicum. The zeroth entry of the array contains
671            the mean value of the Obs, entries 1 to N contain the N jackknife samples
672            derived from the Obs. The current implementation only works for observables
673            defined on exactly one ensemble and replicum. The derived jackknife samples
674            should agree with samples from a full jackknife analysis up to O(1/N).
675        """
676
677        if len(self.names) != 1:
678            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
679
680        name = self.names[0]
681        full_data = self.deltas[name] + self.r_values[name]
682        n = full_data.size
683        mean = self.value
684        tmp_jacks = np.zeros(n + 1)
685        tmp_jacks[0] = mean
686        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
687        return tmp_jacks
688
689    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
690        """Export bootstrap samples from the Obs
691
692        Parameters
693        ----------
694        samples : int
695            Number of bootstrap samples to generate.
696        random_numbers : np.ndarray
697            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
698            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
699        save_rng : str
700            Save the random numbers to a file if a path is specified.
701
702        Returns
703        -------
704        numpy.ndarray
705            Returns a numpy array of length N + 1 where N is the number of samples
706            for the given ensemble and replicum. The zeroth entry of the array contains
707            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
708            derived from the Obs. The current implementation only works for observables
709            defined on exactly one ensemble and replicum. The derived bootstrap samples
710            should agree with samples from a full bootstrap analysis up to O(1/N).
711        """
712        if len(self.names) != 1:
713            raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
714
715        name = self.names[0]
716        length = self.N
717
718        if random_numbers is None:
719            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
720            rng = np.random.default_rng(seed)
721            random_numbers = rng.integers(0, length, size=(samples, length))
722
723        if save_rng is not None:
724            np.savetxt(save_rng, random_numbers, fmt='%i')
725
726        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
727        ret = np.zeros(samples + 1)
728        ret[0] = self.value
729        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
730        return ret
731
732    def __float__(self):
733        return float(self.value)
734
735    def __repr__(self):
736        return 'Obs[' + str(self) + ']'
737
738    def __str__(self):
739        return _format_uncertainty(self.value, self._dvalue)
740
741    def __format__(self, format_type):
742        if format_type == "":
743            significance = 2
744        else:
745            significance = int(float(format_type.replace("+", "").replace("-", "")))
746        my_str = _format_uncertainty(self.value, self._dvalue,
747                                     significance=significance)
748        for char in ["+", " "]:
749            if format_type.startswith(char):
750                if my_str[0] != "-":
751                    my_str = char + my_str
752        return my_str
753
754    def __hash__(self):
755        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
756        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
757        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
758        hash_tuple += tuple([o.encode() for o in self.names])
759        m = hashlib.md5()
760        [m.update(o) for o in hash_tuple]
761        return int(m.hexdigest(), 16) & 0xFFFFFFFF
762
763    # Overload comparisons
764    def __lt__(self, other):
765        return self.value < other
766
767    def __le__(self, other):
768        return self.value <= other
769
770    def __gt__(self, other):
771        return self.value > other
772
773    def __ge__(self, other):
774        return self.value >= other
775
776    def __eq__(self, other):
777        if other is None:
778            return False
779        return (self - other).is_zero()
780
781    # Overload math operations
782    def __add__(self, y):
783        if isinstance(y, Obs):
784            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
785        else:
786            if isinstance(y, np.ndarray):
787                return np.array([self + o for o in y])
788            elif y.__class__.__name__ in ['Corr', 'CObs']:
789                return NotImplemented
790            else:
791                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
792
793    def __radd__(self, y):
794        return self + y
795
796    def __mul__(self, y):
797        if isinstance(y, Obs):
798            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
799        else:
800            if isinstance(y, np.ndarray):
801                return np.array([self * o for o in y])
802            elif isinstance(y, complex):
803                return CObs(self * y.real, self * y.imag)
804            elif y.__class__.__name__ in ['Corr', 'CObs']:
805                return NotImplemented
806            else:
807                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
808
809    def __rmul__(self, y):
810        return self * y
811
812    def __sub__(self, y):
813        if isinstance(y, Obs):
814            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
815        else:
816            if isinstance(y, np.ndarray):
817                return np.array([self - o for o in y])
818            elif y.__class__.__name__ in ['Corr', 'CObs']:
819                return NotImplemented
820            else:
821                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
822
823    def __rsub__(self, y):
824        return -1 * (self - y)
825
826    def __pos__(self):
827        return self
828
829    def __neg__(self):
830        return -1 * self
831
832    def __truediv__(self, y):
833        if isinstance(y, Obs):
834            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
835        else:
836            if isinstance(y, np.ndarray):
837                return np.array([self / o for o in y])
838            elif y.__class__.__name__ in ['Corr', 'CObs']:
839                return NotImplemented
840            else:
841                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
842
843    def __rtruediv__(self, y):
844        if isinstance(y, Obs):
845            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
846        else:
847            if isinstance(y, np.ndarray):
848                return np.array([o / self for o in y])
849            elif y.__class__.__name__ in ['Corr', 'CObs']:
850                return NotImplemented
851            else:
852                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
853
854    def __pow__(self, y):
855        if isinstance(y, Obs):
856            return derived_observable(lambda x: x[0] ** x[1], [self, y])
857        else:
858            return derived_observable(lambda x: x[0] ** y, [self])
859
860    def __rpow__(self, y):
861        if isinstance(y, Obs):
862            return derived_observable(lambda x: x[0] ** x[1], [y, self])
863        else:
864            return derived_observable(lambda x: y ** x[0], [self])
865
866    def __abs__(self):
867        return derived_observable(lambda x: anp.abs(x[0]), [self])
868
869    # Overload numpy functions
870    def sqrt(self):
871        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
872
873    def log(self):
874        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
875
876    def exp(self):
877        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
878
879    def sin(self):
880        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
881
882    def cos(self):
883        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
884
885    def tan(self):
886        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
887
888    def arcsin(self):
889        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
890
891    def arccos(self):
892        return derived_observable(lambda x: anp.arccos(x[0]), [self])
893
894    def arctan(self):
895        return derived_observable(lambda x: anp.arctan(x[0]), [self])
896
897    def sinh(self):
898        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
899
900    def cosh(self):
901        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
902
903    def tanh(self):
904        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
905
906    def arcsinh(self):
907        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
908
909    def arccosh(self):
910        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
911
912    def arctanh(self):
913        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 61    def __init__(self, samples, names, idl=None, **kwargs):
 62        """ Initialize Obs object.
 63
 64        Parameters
 65        ----------
 66        samples : list
 67            list of numpy arrays containing the Monte Carlo samples
 68        names : list
 69            list of strings labeling the individual samples
 70        idl : list, optional
 71            list of ranges or lists on which the samples are defined
 72        """
 73
 74        if kwargs.get("means") is None and len(samples):
 75            if len(samples) != len(names):
 76                raise ValueError('Length of samples and names incompatible.')
 77            if idl is not None:
 78                if len(idl) != len(names):
 79                    raise ValueError('Length of idl incompatible with samples and names.')
 80            name_length = len(names)
 81            if name_length > 1:
 82                if name_length != len(set(names)):
 83                    raise ValueError('Names are not unique.')
 84                if not all(isinstance(x, str) for x in names):
 85                    raise TypeError('All names have to be strings.')
 86            else:
 87                if not isinstance(names[0], str):
 88                    raise TypeError('All names have to be strings.')
 89            if min(len(x) for x in samples) <= 4:
 90                raise ValueError('Samples have to have at least 5 entries.')
 91
 92        self.names = sorted(names)
 93        self.shape = {}
 94        self.r_values = {}
 95        self.deltas = {}
 96        self._covobs = {}
 97
 98        self._value = 0
 99        self.N = 0
100        self.idl = {}
101        if idl is not None:
102            for name, idx in sorted(zip(names, idl)):
103                if isinstance(idx, range):
104                    self.idl[name] = idx
105                elif isinstance(idx, (list, np.ndarray)):
106                    dc = np.unique(np.diff(idx))
107                    if np.any(dc < 0):
108                        raise ValueError("Unsorted idx for idl[%s]" % (name))
109                    if len(dc) == 1:
110                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
111                    else:
112                        self.idl[name] = list(idx)
113                else:
114                    raise TypeError('incompatible type for idl[%s].' % (name))
115        else:
116            for name, sample in sorted(zip(names, samples)):
117                self.idl[name] = range(1, len(sample) + 1)
118
119        if kwargs.get("means") is not None:
120            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
121                self.shape[name] = len(self.idl[name])
122                self.N += self.shape[name]
123                self.r_values[name] = mean
124                self.deltas[name] = sample
125        else:
126            for name, sample in sorted(zip(names, samples)):
127                self.shape[name] = len(self.idl[name])
128                self.N += self.shape[name]
129                if len(sample) != self.shape[name]:
130                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
131                self.r_values[name] = np.mean(sample)
132                self.deltas[name] = sample - self.r_values[name]
133                self._value += self.shape[name] * self.r_values[name]
134            self._value /= self.N
135
136        self._dvalue = 0.0
137        self.ddvalue = 0.0
138        self.reweighted = False
139
140        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
S_global = 2.0
S_dict = {}
tau_exp_global = 0.0
tau_exp_dict = {}
N_sigma_global = 1.0
N_sigma_dict = {}
names
shape
r_values
deltas
N
idl
ddvalue
reweighted
tag
value
dvalue
e_names
cov_names
mc_names
e_content
covobs
def gamma_method(self, **kwargs):
175    def gamma_method(self, **kwargs):
176        """Estimate the error and related properties of the Obs.
177
178        Parameters
179        ----------
180        S : float
181            specifies a custom value for the parameter S (default 2.0).
182            If set to 0 it is assumed that the data exhibits no
183            autocorrelation. In this case the error estimates coincides
184            with the sample standard error.
185        tau_exp : float
186            positive value triggers the critical slowing down analysis
187            (default 0.0).
188        N_sigma : float
189            number of standard deviations from zero until the tail is
190            attached to the autocorrelation function (default 1).
191        fft : bool
192            determines whether the fft algorithm is used for the computation
193            of the autocorrelation function (default True)
194        """
195
196        e_content = self.e_content
197        self.e_dvalue = {}
198        self.e_ddvalue = {}
199        self.e_tauint = {}
200        self.e_dtauint = {}
201        self.e_windowsize = {}
202        self.e_n_tauint = {}
203        self.e_n_dtauint = {}
204        e_gamma = {}
205        self.e_rho = {}
206        self.e_drho = {}
207        self._dvalue = 0
208        self.ddvalue = 0
209
210        self.S = {}
211        self.tau_exp = {}
212        self.N_sigma = {}
213
214        if kwargs.get('fft') is False:
215            fft = False
216        else:
217            fft = True
218
219        def _parse_kwarg(kwarg_name):
220            if kwarg_name in kwargs:
221                tmp = kwargs.get(kwarg_name)
222                if isinstance(tmp, (int, float)):
223                    if tmp < 0:
224                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
225                    for e, e_name in enumerate(self.e_names):
226                        getattr(self, kwarg_name)[e_name] = tmp
227                else:
228                    raise TypeError(kwarg_name + ' is not in proper format.')
229            else:
230                for e, e_name in enumerate(self.e_names):
231                    if e_name in getattr(Obs, kwarg_name + '_dict'):
232                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
233                    else:
234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
235
236        _parse_kwarg('S')
237        _parse_kwarg('tau_exp')
238        _parse_kwarg('N_sigma')
239
240        for e, e_name in enumerate(self.mc_names):
241            gapsize = _determine_gap(self, e_content, e_name)
242
243            r_length = []
244            for r_name in e_content[e_name]:
245                if isinstance(self.idl[r_name], range):
246                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
247                else:
248                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
249
250            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
251            w_max = max(r_length) // 2
252            e_gamma[e_name] = np.zeros(w_max)
253            self.e_rho[e_name] = np.zeros(w_max)
254            self.e_drho[e_name] = np.zeros(w_max)
255
256            for r_name in e_content[e_name]:
257                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
258
259            gamma_div = np.zeros(w_max)
260            for r_name in e_content[e_name]:
261                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
262            gamma_div[gamma_div < 1] = 1.0
263            e_gamma[e_name] /= gamma_div[:w_max]
264
265            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
266                self.e_tauint[e_name] = 0.5
267                self.e_dtauint[e_name] = 0.0
268                self.e_dvalue[e_name] = 0.0
269                self.e_ddvalue[e_name] = 0.0
270                self.e_windowsize[e_name] = 0
271                continue
272
273            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
274            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
275            # Make sure no entry of tauint is smaller than 0.5
276            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
277            # hep-lat/0306017 eq. (42)
278            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
279            self.e_n_dtauint[e_name][0] = 0.0
280
281            def _compute_drho(i):
282                tmp = (self.e_rho[e_name][i + 1:w_max]
283                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
284                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
285                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
286                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
287
288            if self.tau_exp[e_name] > 0:
289                _compute_drho(1)
290                texp = self.tau_exp[e_name]
291                # Critical slowing down analysis
292                if w_max // 2 <= 1:
293                    raise Exception("Need at least 8 samples for tau_exp error analysis")
294                for n in range(1, w_max // 2):
295                    _compute_drho(n + 1)
296                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
297                        # Bias correction hep-lat/0306017 eq. (49) included
298                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
299                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
300                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
301                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
302                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
303                        self.e_windowsize[e_name] = n
304                        break
305            else:
306                if self.S[e_name] == 0.0:
307                    self.e_tauint[e_name] = 0.5
308                    self.e_dtauint[e_name] = 0.0
309                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
310                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
311                    self.e_windowsize[e_name] = 0
312                else:
313                    # Standard automatic windowing procedure
314                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
315                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
316                    for n in range(1, w_max):
317                        if g_w[n - 1] < 0 or n >= w_max - 1:
318                            _compute_drho(n)
319                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
320                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
321                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
322                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
323                            self.e_windowsize[e_name] = n
324                            break
325
326            self._dvalue += self.e_dvalue[e_name] ** 2
327            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
328
329        for e_name in self.cov_names:
330            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
331            self.e_ddvalue[e_name] = 0
332            self._dvalue += self.e_dvalue[e_name]**2
333
334        self._dvalue = np.sqrt(self._dvalue)
335        if self._dvalue == 0.0:
336            self.ddvalue = 0.0
337        else:
338            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
339        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def gm(self, **kwargs):
175    def gamma_method(self, **kwargs):
176        """Estimate the error and related properties of the Obs.
177
178        Parameters
179        ----------
180        S : float
181            specifies a custom value for the parameter S (default 2.0).
182            If set to 0 it is assumed that the data exhibits no
183            autocorrelation. In this case the error estimates coincides
184            with the sample standard error.
185        tau_exp : float
186            positive value triggers the critical slowing down analysis
187            (default 0.0).
188        N_sigma : float
189            number of standard deviations from zero until the tail is
190            attached to the autocorrelation function (default 1).
191        fft : bool
192            determines whether the fft algorithm is used for the computation
193            of the autocorrelation function (default True)
194        """
195
196        e_content = self.e_content
197        self.e_dvalue = {}
198        self.e_ddvalue = {}
199        self.e_tauint = {}
200        self.e_dtauint = {}
201        self.e_windowsize = {}
202        self.e_n_tauint = {}
203        self.e_n_dtauint = {}
204        e_gamma = {}
205        self.e_rho = {}
206        self.e_drho = {}
207        self._dvalue = 0
208        self.ddvalue = 0
209
210        self.S = {}
211        self.tau_exp = {}
212        self.N_sigma = {}
213
214        if kwargs.get('fft') is False:
215            fft = False
216        else:
217            fft = True
218
219        def _parse_kwarg(kwarg_name):
220            if kwarg_name in kwargs:
221                tmp = kwargs.get(kwarg_name)
222                if isinstance(tmp, (int, float)):
223                    if tmp < 0:
224                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
225                    for e, e_name in enumerate(self.e_names):
226                        getattr(self, kwarg_name)[e_name] = tmp
227                else:
228                    raise TypeError(kwarg_name + ' is not in proper format.')
229            else:
230                for e, e_name in enumerate(self.e_names):
231                    if e_name in getattr(Obs, kwarg_name + '_dict'):
232                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
233                    else:
234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
235
236        _parse_kwarg('S')
237        _parse_kwarg('tau_exp')
238        _parse_kwarg('N_sigma')
239
240        for e, e_name in enumerate(self.mc_names):
241            gapsize = _determine_gap(self, e_content, e_name)
242
243            r_length = []
244            for r_name in e_content[e_name]:
245                if isinstance(self.idl[r_name], range):
246                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
247                else:
248                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
249
250            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
251            w_max = max(r_length) // 2
252            e_gamma[e_name] = np.zeros(w_max)
253            self.e_rho[e_name] = np.zeros(w_max)
254            self.e_drho[e_name] = np.zeros(w_max)
255
256            for r_name in e_content[e_name]:
257                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
258
259            gamma_div = np.zeros(w_max)
260            for r_name in e_content[e_name]:
261                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
262            gamma_div[gamma_div < 1] = 1.0
263            e_gamma[e_name] /= gamma_div[:w_max]
264
265            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
266                self.e_tauint[e_name] = 0.5
267                self.e_dtauint[e_name] = 0.0
268                self.e_dvalue[e_name] = 0.0
269                self.e_ddvalue[e_name] = 0.0
270                self.e_windowsize[e_name] = 0
271                continue
272
273            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
274            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
275            # Make sure no entry of tauint is smaller than 0.5
276            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
277            # hep-lat/0306017 eq. (42)
278            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
279            self.e_n_dtauint[e_name][0] = 0.0
280
281            def _compute_drho(i):
282                tmp = (self.e_rho[e_name][i + 1:w_max]
283                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
284                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
285                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
286                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
287
288            if self.tau_exp[e_name] > 0:
289                _compute_drho(1)
290                texp = self.tau_exp[e_name]
291                # Critical slowing down analysis
292                if w_max // 2 <= 1:
293                    raise Exception("Need at least 8 samples for tau_exp error analysis")
294                for n in range(1, w_max // 2):
295                    _compute_drho(n + 1)
296                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
297                        # Bias correction hep-lat/0306017 eq. (49) included
298                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
299                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
300                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
301                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
302                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
303                        self.e_windowsize[e_name] = n
304                        break
305            else:
306                if self.S[e_name] == 0.0:
307                    self.e_tauint[e_name] = 0.5
308                    self.e_dtauint[e_name] = 0.0
309                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
310                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
311                    self.e_windowsize[e_name] = 0
312                else:
313                    # Standard automatic windowing procedure
314                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
315                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
316                    for n in range(1, w_max):
317                        if g_w[n - 1] < 0 or n >= w_max - 1:
318                            _compute_drho(n)
319                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
320                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
321                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
322                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
323                            self.e_windowsize[e_name] = n
324                            break
325
326            self._dvalue += self.e_dvalue[e_name] ** 2
327            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
328
329        for e_name in self.cov_names:
330            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
331            self.e_ddvalue[e_name] = 0
332            self._dvalue += self.e_dvalue[e_name]**2
333
334        self._dvalue = np.sqrt(self._dvalue)
335        if self._dvalue == 0.0:
336            self.ddvalue = 0.0
337        else:
338            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
339        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True):
379    def details(self, ens_content=True):
380        """Output detailed properties of the Obs.
381
382        Parameters
383        ----------
384        ens_content : bool
385            print details about the ensembles and replica if true.
386        """
387        if self.tag is not None:
388            print("Description:", self.tag)
389        if not hasattr(self, 'e_dvalue'):
390            print('Result\t %3.8e' % (self.value))
391        else:
392            if self.value == 0.0:
393                percentage = np.nan
394            else:
395                percentage = np.abs(self._dvalue / self.value) * 100
396            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
397            if len(self.e_names) > 1:
398                print(' Ensemble errors:')
399            e_content = self.e_content
400            for e_name in self.mc_names:
401                gap = _determine_gap(self, e_content, e_name)
402
403                if len(self.e_names) > 1:
404                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
405                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
406                tau_string += f" in units of {gap} config"
407                if gap > 1:
408                    tau_string += "s"
409                if self.tau_exp[e_name] > 0:
410                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
411                else:
412                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
413                print(tau_string)
414            for e_name in self.cov_names:
415                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
416        if ens_content is True:
417            if len(self.e_names) == 1:
418                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
419            else:
420                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
421            my_string_list = []
422            for key, value in sorted(self.e_content.items()):
423                if key not in self.covobs:
424                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
425                    if len(value) == 1:
426                        my_string += f': {self.shape[value[0]]} configurations'
427                        if isinstance(self.idl[value[0]], range):
428                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
429                        else:
430                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
431                    else:
432                        sublist = []
433                        for v in value:
434                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
435                            my_substring += f': {self.shape[v]} configurations'
436                            if isinstance(self.idl[v], range):
437                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
438                            else:
439                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
440                            sublist.append(my_substring)
441
442                        my_string += '\n' + '\n'.join(sublist)
443                else:
444                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
445                my_string_list.append(my_string)
446            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def reweight(self, weight):
448    def reweight(self, weight):
449        """Reweight the obs with given rewighting factors.
450
451        Parameters
452        ----------
453        weight : Obs
454            Reweighting factor. An Observable that has to be defined on a superset of the
455            configurations in obs[i].idl for all i.
456        all_configs : bool
457            if True, the reweighted observables are normalized by the average of
458            the reweighting factor on all configurations in weight.idl and not
459            on the configurations in obs[i].idl. Default False.
460        """
461        return reweight(weight, [self])[0]

Reweight the obs with given rewighting factors.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def is_zero_within_error(self, sigma=1):
463    def is_zero_within_error(self, sigma=1):
464        """Checks whether the observable is zero within 'sigma' standard errors.
465
466        Parameters
467        ----------
468        sigma : int
469            Number of standard errors used for the check.
470
471        Works only properly when the gamma method was run.
472        """
473        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10):
475    def is_zero(self, atol=1e-10):
476        """Checks whether the observable is zero within a given tolerance.
477
478        Parameters
479        ----------
480        atol : float
481            Absolute tolerance (for details see numpy documentation).
482        """
483        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None):
485    def plot_tauint(self, save=None):
486        """Plot integrated autocorrelation time for each ensemble.
487
488        Parameters
489        ----------
490        save : str
491            saves the figure to a file named 'save' if.
492        """
493        if not hasattr(self, 'e_dvalue'):
494            raise Exception('Run the gamma method first.')
495
496        for e, e_name in enumerate(self.mc_names):
497            fig = plt.figure()
498            plt.xlabel(r'$W$')
499            plt.ylabel(r'$\tau_\mathrm{int}$')
500            length = int(len(self.e_n_tauint[e_name]))
501            if self.tau_exp[e_name] > 0:
502                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
503                x_help = np.arange(2 * self.tau_exp[e_name])
504                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
505                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
506                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
507                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
508                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
509                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
510                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
511            else:
512                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
513                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
514
515            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
516            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
517            plt.legend()
518            plt.xlim(-0.5, xmax)
519            ylim = plt.ylim()
520            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
521            plt.draw()
522            if save:
523                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None):
525    def plot_rho(self, save=None):
526        """Plot normalized autocorrelation function time for each ensemble.
527
528        Parameters
529        ----------
530        save : str
531            saves the figure to a file named 'save' if.
532        """
533        if not hasattr(self, 'e_dvalue'):
534            raise Exception('Run the gamma method first.')
535        for e, e_name in enumerate(self.mc_names):
536            fig = plt.figure()
537            plt.xlabel('W')
538            plt.ylabel('rho')
539            length = int(len(self.e_drho[e_name]))
540            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
541            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
542            if self.tau_exp[e_name] > 0:
543                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
544                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
545                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
546                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
547            else:
548                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
549                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
550            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
551            plt.xlim(-0.5, xmax)
552            plt.draw()
553            if save:
554                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self):
556    def plot_rep_dist(self):
557        """Plot replica distribution for each ensemble with more than one replicum."""
558        if not hasattr(self, 'e_dvalue'):
559            raise Exception('Run the gamma method first.')
560        for e, e_name in enumerate(self.mc_names):
561            if len(self.e_content[e_name]) == 1:
562                print('No replica distribution for a single replicum (', e_name, ')')
563                continue
564            r_length = []
565            sub_r_mean = 0
566            for r, r_name in enumerate(self.e_content[e_name]):
567                r_length.append(len(self.deltas[r_name]))
568                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
569            e_N = np.sum(r_length)
570            sub_r_mean /= e_N
571            arr = np.zeros(len(self.e_content[e_name]))
572            for r, r_name in enumerate(self.e_content[e_name]):
573                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
574            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
575            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
576            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True):
578    def plot_history(self, expand=True):
579        """Plot derived Monte Carlo history for each ensemble
580
581        Parameters
582        ----------
583        expand : bool
584            show expanded history for irregular Monte Carlo chains (default: True).
585        """
586        for e, e_name in enumerate(self.mc_names):
587            plt.figure()
588            r_length = []
589            tmp = []
590            tmp_expanded = []
591            for r, r_name in enumerate(self.e_content[e_name]):
592                tmp.append(self.deltas[r_name] + self.r_values[r_name])
593                if expand:
594                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
595                    r_length.append(len(tmp_expanded[-1]))
596                else:
597                    r_length.append(len(tmp[-1]))
598            e_N = np.sum(r_length)
599            x = np.arange(e_N)
600            y_test = np.concatenate(tmp, axis=0)
601            if expand:
602                y = np.concatenate(tmp_expanded, axis=0)
603            else:
604                y = y_test
605            plt.errorbar(x, y, fmt='.', markersize=3)
606            plt.xlim(-0.5, e_N - 0.5)
607            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
608            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None):
610    def plot_piechart(self, save=None):
611        """Plot piechart which shows the fractional contribution of each
612        ensemble to the error and returns a dictionary containing the fractions.
613
614        Parameters
615        ----------
616        save : str
617            saves the figure to a file named 'save' if.
618        """
619        if not hasattr(self, 'e_dvalue'):
620            raise Exception('Run the gamma method first.')
621        if np.isclose(0.0, self._dvalue, atol=1e-15):
622            raise Exception('Error is 0.0')
623        labels = self.e_names
624        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
625        fig1, ax1 = plt.subplots()
626        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
627        ax1.axis('equal')
628        plt.draw()
629        if save:
630            fig1.savefig(save)
631
632        return dict(zip(labels, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs):
634    def dump(self, filename, datatype="json.gz", description="", **kwargs):
635        """Dump the Obs to a file 'name' of chosen format.
636
637        Parameters
638        ----------
639        filename : str
640            name of the file to be saved.
641        datatype : str
642            Format of the exported file. Supported formats include
643            "json.gz" and "pickle"
644        description : str
645            Description for output file, only relevant for json.gz format.
646        path : str
647            specifies a custom path for the file (default '.')
648        """
649        if 'path' in kwargs:
650            file_name = kwargs.get('path') + '/' + filename
651        else:
652            file_name = filename
653
654        if datatype == "json.gz":
655            from .input.json import dump_to_json
656            dump_to_json([self], file_name, description=description)
657        elif datatype == "pickle":
658            with open(file_name + '.p', 'wb') as fb:
659                pickle.dump(self, fb)
660        else:
661            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self):
663    def export_jackknife(self):
664        """Export jackknife samples from the Obs
665
666        Returns
667        -------
668        numpy.ndarray
669            Returns a numpy array of length N + 1 where N is the number of samples
670            for the given ensemble and replicum. The zeroth entry of the array contains
671            the mean value of the Obs, entries 1 to N contain the N jackknife samples
672            derived from the Obs. The current implementation only works for observables
673            defined on exactly one ensemble and replicum. The derived jackknife samples
674            should agree with samples from a full jackknife analysis up to O(1/N).
675        """
676
677        if len(self.names) != 1:
678            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
679
680        name = self.names[0]
681        full_data = self.deltas[name] + self.r_values[name]
682        n = full_data.size
683        mean = self.value
684        tmp_jacks = np.zeros(n + 1)
685        tmp_jacks[0] = mean
686        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
687        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
689    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
690        """Export bootstrap samples from the Obs
691
692        Parameters
693        ----------
694        samples : int
695            Number of bootstrap samples to generate.
696        random_numbers : np.ndarray
697            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
698            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
699        save_rng : str
700            Save the random numbers to a file if a path is specified.
701
702        Returns
703        -------
704        numpy.ndarray
705            Returns a numpy array of length N + 1 where N is the number of samples
706            for the given ensemble and replicum. The zeroth entry of the array contains
707            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
708            derived from the Obs. The current implementation only works for observables
709            defined on exactly one ensemble and replicum. The derived bootstrap samples
710            should agree with samples from a full bootstrap analysis up to O(1/N).
711        """
712        if len(self.names) != 1:
713            raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
714
715        name = self.names[0]
716        length = self.N
717
718        if random_numbers is None:
719            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
720            rng = np.random.default_rng(seed)
721            random_numbers = rng.integers(0, length, size=(samples, length))
722
723        if save_rng is not None:
724            np.savetxt(save_rng, random_numbers, fmt='%i')
725
726        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
727        ret = np.zeros(samples + 1)
728        ret[0] = self.value
729        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
730        return ret

Export bootstrap samples from the Obs

Parameters
  • samples (int): Number of bootstrap samples to generate.
  • random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
  • save_rng (str): Save the random numbers to a file if a path is specified.
Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived bootstrap samples should agree with samples from a full bootstrap analysis up to O(1/N).
def sqrt(self):
870    def sqrt(self):
871        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self):
873    def log(self):
874        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self):
876    def exp(self):
877        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self):
879    def sin(self):
880        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self):
882    def cos(self):
883        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self):
885    def tan(self):
886        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self):
888    def arcsin(self):
889        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self):
891    def arccos(self):
892        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self):
894    def arctan(self):
895        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self):
897    def sinh(self):
898        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self):
900    def cosh(self):
901        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self):
903    def tanh(self):
904        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self):
906    def arcsinh(self):
907        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self):
909    def arccosh(self):
910        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self):
912    def arctanh(self):
913        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
N_sigma
S
e_ddvalue
e_drho
e_dtauint
e_dvalue
e_n_dtauint
e_n_tauint
e_rho
e_tauint
e_windowsize
tau_exp
class CObs:
 916class CObs:
 917    """Class for a complex valued observable."""
 918    __slots__ = ['_real', '_imag', 'tag']
 919
 920    def __init__(self, real, imag=0.0):
 921        self._real = real
 922        self._imag = imag
 923        self.tag = None
 924
 925    @property
 926    def real(self):
 927        return self._real
 928
 929    @property
 930    def imag(self):
 931        return self._imag
 932
 933    def gamma_method(self, **kwargs):
 934        """Executes the gamma_method for the real and the imaginary part."""
 935        if isinstance(self.real, Obs):
 936            self.real.gamma_method(**kwargs)
 937        if isinstance(self.imag, Obs):
 938            self.imag.gamma_method(**kwargs)
 939
 940    def is_zero(self):
 941        """Checks whether both real and imaginary part are zero within machine precision."""
 942        return self.real == 0.0 and self.imag == 0.0
 943
 944    def conjugate(self):
 945        return CObs(self.real, -self.imag)
 946
 947    def __add__(self, other):
 948        if isinstance(other, np.ndarray):
 949            return other + self
 950        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 951            return CObs(self.real + other.real,
 952                        self.imag + other.imag)
 953        else:
 954            return CObs(self.real + other, self.imag)
 955
 956    def __radd__(self, y):
 957        return self + y
 958
 959    def __sub__(self, other):
 960        if isinstance(other, np.ndarray):
 961            return -1 * (other - self)
 962        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 963            return CObs(self.real - other.real, self.imag - other.imag)
 964        else:
 965            return CObs(self.real - other, self.imag)
 966
 967    def __rsub__(self, other):
 968        return -1 * (self - other)
 969
 970    def __mul__(self, other):
 971        if isinstance(other, np.ndarray):
 972            return other * self
 973        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 974            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 975                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 976                                               [self.real, other.real, self.imag, other.imag],
 977                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 978                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 979                                               [self.real, other.real, self.imag, other.imag],
 980                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 981            elif getattr(other, 'imag', 0) != 0:
 982                return CObs(self.real * other.real - self.imag * other.imag,
 983                            self.imag * other.real + self.real * other.imag)
 984            else:
 985                return CObs(self.real * other.real, self.imag * other.real)
 986        else:
 987            return CObs(self.real * other, self.imag * other)
 988
 989    def __rmul__(self, other):
 990        return self * other
 991
 992    def __truediv__(self, other):
 993        if isinstance(other, np.ndarray):
 994            return 1 / (other / self)
 995        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 996            r = other.real ** 2 + other.imag ** 2
 997            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 998        else:
 999            return CObs(self.real / other, self.imag / other)
1000
1001    def __rtruediv__(self, other):
1002        r = self.real ** 2 + self.imag ** 2
1003        if hasattr(other, 'real') and hasattr(other, 'imag'):
1004            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1005        else:
1006            return CObs(self.real * other / r, -self.imag * other / r)
1007
1008    def __abs__(self):
1009        return np.sqrt(self.real**2 + self.imag**2)
1010
1011    def __pos__(self):
1012        return self
1013
1014    def __neg__(self):
1015        return -1 * self
1016
1017    def __eq__(self, other):
1018        return self.real == other.real and self.imag == other.imag
1019
1020    def __str__(self):
1021        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1022
1023    def __repr__(self):
1024        return 'CObs[' + str(self) + ']'
1025
1026    def __format__(self, format_type):
1027        if format_type == "":
1028            significance = 2
1029            format_type = "2"
1030        else:
1031            significance = int(float(format_type.replace("+", "").replace("-", "")))
1032        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"

Class for a complex valued observable.

CObs(real, imag=0.0)
920    def __init__(self, real, imag=0.0):
921        self._real = real
922        self._imag = imag
923        self.tag = None
tag
real
imag
def gamma_method(self, **kwargs):
933    def gamma_method(self, **kwargs):
934        """Executes the gamma_method for the real and the imaginary part."""
935        if isinstance(self.real, Obs):
936            self.real.gamma_method(**kwargs)
937        if isinstance(self.imag, Obs):
938            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self):
940    def is_zero(self):
941        """Checks whether both real and imaginary part are zero within machine precision."""
942        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self):
944    def conjugate(self):
945        return CObs(self.real, -self.imag)
def gamma_method(x, **kwargs):
1035def gamma_method(x, **kwargs):
1036    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1037
1038    See docstring of pe.Obs.gamma_method for details.
1039    """
1040    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def gm(x, **kwargs):
1035def gamma_method(x, **kwargs):
1036    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1037
1038    See docstring of pe.Obs.gamma_method for details.
1039    """
1040    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def derived_observable(func, data, array_mode=False, **kwargs):
1165def derived_observable(func, data, array_mode=False, **kwargs):
1166    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1167
1168    Parameters
1169    ----------
1170    func : object
1171        arbitrary function of the form func(data, **kwargs). For the
1172        automatic differentiation to work, all numpy functions have to have
1173        the autograd wrapper (use 'import autograd.numpy as anp').
1174    data : list
1175        list of Obs, e.g. [obs1, obs2, obs3].
1176    num_grad : bool
1177        if True, numerical derivatives are used instead of autograd
1178        (default False). To control the numerical differentiation the
1179        kwargs of numdifftools.step_generators.MaxStepGenerator
1180        can be used.
1181    man_grad : list
1182        manually supply a list or an array which contains the jacobian
1183        of func. Use cautiously, supplying the wrong derivative will
1184        not be intercepted.
1185
1186    Notes
1187    -----
1188    For simple mathematical operations it can be practical to use anonymous
1189    functions. For the ratio of two observables one can e.g. use
1190
1191    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1192    """
1193
1194    data = np.asarray(data)
1195    raveled_data = data.ravel()
1196
1197    # Workaround for matrix operations containing non Obs data
1198    if not all(isinstance(x, Obs) for x in raveled_data):
1199        for i in range(len(raveled_data)):
1200            if isinstance(raveled_data[i], (int, float)):
1201                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1202
1203    allcov = {}
1204    for o in raveled_data:
1205        for name in o.cov_names:
1206            if name in allcov:
1207                if not np.allclose(allcov[name], o.covobs[name].cov):
1208                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1209            else:
1210                allcov[name] = o.covobs[name].cov
1211
1212    n_obs = len(raveled_data)
1213    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1214    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1215    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1216
1217    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1218
1219    if data.ndim == 1:
1220        values = np.array([o.value for o in data])
1221    else:
1222        values = np.vectorize(lambda x: x.value)(data)
1223
1224    new_values = func(values, **kwargs)
1225
1226    multi = int(isinstance(new_values, np.ndarray))
1227
1228    new_r_values = {}
1229    new_idl_d = {}
1230    for name in new_sample_names:
1231        idl = []
1232        tmp_values = np.zeros(n_obs)
1233        for i, item in enumerate(raveled_data):
1234            tmp_values[i] = item.r_values.get(name, item.value)
1235            tmp_idl = item.idl.get(name)
1236            if tmp_idl is not None:
1237                idl.append(tmp_idl)
1238        if multi > 0:
1239            tmp_values = np.array(tmp_values).reshape(data.shape)
1240        new_r_values[name] = func(tmp_values, **kwargs)
1241        new_idl_d[name] = _merge_idx(idl)
1242
1243    if 'man_grad' in kwargs:
1244        deriv = np.asarray(kwargs.get('man_grad'))
1245        if new_values.shape + data.shape != deriv.shape:
1246            raise Exception('Manual derivative does not have correct shape.')
1247    elif kwargs.get('num_grad') is True:
1248        if multi > 0:
1249            raise Exception('Multi mode currently not supported for numerical derivative')
1250        options = {
1251            'base_step': 0.1,
1252            'step_ratio': 2.5}
1253        for key in options.keys():
1254            kwarg = kwargs.get(key)
1255            if kwarg is not None:
1256                options[key] = kwarg
1257        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1258        if tmp_df.size == 1:
1259            deriv = np.array([tmp_df.real])
1260        else:
1261            deriv = tmp_df.real
1262    else:
1263        deriv = jacobian(func)(values, **kwargs)
1264
1265    final_result = np.zeros(new_values.shape, dtype=object)
1266
1267    if array_mode is True:
1268
1269        class _Zero_grad():
1270            def __init__(self, N):
1271                self.grad = np.zeros((N, 1))
1272
1273        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1274        d_extracted = {}
1275        g_extracted = {}
1276        for name in new_sample_names:
1277            d_extracted[name] = []
1278            ens_length = len(new_idl_d[name])
1279            for i_dat, dat in enumerate(data):
1280                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1281        for name in new_cov_names:
1282            g_extracted[name] = []
1283            zero_grad = _Zero_grad(new_covobs_lengths[name])
1284            for i_dat, dat in enumerate(data):
1285                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1286
1287    for i_val, new_val in np.ndenumerate(new_values):
1288        new_deltas = {}
1289        new_grad = {}
1290        if array_mode is True:
1291            for name in new_sample_names:
1292                ens_length = d_extracted[name][0].shape[-1]
1293                new_deltas[name] = np.zeros(ens_length)
1294                for i_dat, dat in enumerate(d_extracted[name]):
1295                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1296            for name in new_cov_names:
1297                new_grad[name] = 0
1298                for i_dat, dat in enumerate(g_extracted[name]):
1299                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1300        else:
1301            for j_obs, obs in np.ndenumerate(data):
1302                for name in obs.names:
1303                    if name in obs.cov_names:
1304                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1305                    else:
1306                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1307
1308        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1309
1310        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1311            raise Exception('The same name has been used for deltas and covobs!')
1312        new_samples = []
1313        new_means = []
1314        new_idl = []
1315        new_names_obs = []
1316        for name in new_names:
1317            if name not in new_covobs:
1318                new_samples.append(new_deltas[name])
1319                new_idl.append(new_idl_d[name])
1320                new_means.append(new_r_values[name][i_val])
1321                new_names_obs.append(name)
1322        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1323        for name in new_covobs:
1324            final_result[i_val].names.append(name)
1325        final_result[i_val]._covobs = new_covobs
1326        final_result[i_val]._value = new_val
1327        final_result[i_val].reweighted = reweighted
1328
1329    if multi == 0:
1330        final_result = final_result.item()
1331
1332    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs):
1364def reweight(weight, obs, **kwargs):
1365    """Reweight a list of observables.
1366
1367    Parameters
1368    ----------
1369    weight : Obs
1370        Reweighting factor. An Observable that has to be defined on a superset of the
1371        configurations in obs[i].idl for all i.
1372    obs : list
1373        list of Obs, e.g. [obs1, obs2, obs3].
1374    all_configs : bool
1375        if True, the reweighted observables are normalized by the average of
1376        the reweighting factor on all configurations in weight.idl and not
1377        on the configurations in obs[i].idl. Default False.
1378    """
1379    result = []
1380    for i in range(len(obs)):
1381        if len(obs[i].cov_names):
1382            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1383        if not set(obs[i].names).issubset(weight.names):
1384            raise Exception('Error: Ensembles do not fit')
1385        for name in obs[i].names:
1386            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1387                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1388        new_samples = []
1389        w_deltas = {}
1390        for name in sorted(obs[i].names):
1391            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1392            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1393        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1394
1395        if kwargs.get('all_configs'):
1396            new_weight = weight
1397        else:
1398            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1399
1400        result.append(tmp_obs / new_weight)
1401        result[-1].reweighted = True
1402
1403    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def correlate(obs_a, obs_b):
1406def correlate(obs_a, obs_b):
1407    """Correlate two observables.
1408
1409    Parameters
1410    ----------
1411    obs_a : Obs
1412        First observable
1413    obs_b : Obs
1414        Second observable
1415
1416    Notes
1417    -----
1418    Keep in mind to only correlate primary observables which have not been reweighted
1419    yet. The reweighting has to be applied after correlating the observables.
1420    Currently only works if ensembles are identical (this is not strictly necessary).
1421    """
1422
1423    if sorted(obs_a.names) != sorted(obs_b.names):
1424        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1425    if len(obs_a.cov_names) or len(obs_b.cov_names):
1426        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1427    for name in obs_a.names:
1428        if obs_a.shape[name] != obs_b.shape[name]:
1429            raise Exception('Shapes of ensemble', name, 'do not fit')
1430        if obs_a.idl[name] != obs_b.idl[name]:
1431            raise Exception('idl of ensemble', name, 'do not fit')
1432
1433    if obs_a.reweighted is True:
1434        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1435    if obs_b.reweighted is True:
1436        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1437
1438    new_samples = []
1439    new_idl = []
1440    for name in sorted(obs_a.names):
1441        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1442        new_idl.append(obs_a.idl[name])
1443
1444    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1445    o.reweighted = obs_a.reweighted or obs_b.reweighted
1446    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1449def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1450    r'''Calculates the error covariance matrix of a set of observables.
1451
1452    WARNING: This function should be used with care, especially for observables with support on multiple
1453             ensembles with differing autocorrelations. See the notes below for details.
1454
1455    The gamma method has to be applied first to all observables.
1456
1457    Parameters
1458    ----------
1459    obs : list or numpy.ndarray
1460        List or one dimensional array of Obs
1461    visualize : bool
1462        If True plots the corresponding normalized correlation matrix (default False).
1463    correlation : bool
1464        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1465    smooth : None or int
1466        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1467        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1468        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1469        small ones.
1470
1471    Notes
1472    -----
1473    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1474    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1475    in the absence of autocorrelation.
1476    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1477    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1478    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1479    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1480    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1481    '''
1482
1483    length = len(obs)
1484
1485    max_samples = np.max([o.N for o in obs])
1486    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1487        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1488
1489    cov = np.zeros((length, length))
1490    for i in range(length):
1491        for j in range(i, length):
1492            cov[i, j] = _covariance_element(obs[i], obs[j])
1493    cov = cov + cov.T - np.diag(np.diag(cov))
1494
1495    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1496
1497    if isinstance(smooth, int):
1498        corr = _smooth_eigenvalues(corr, smooth)
1499
1500    if visualize:
1501        plt.matshow(corr, vmin=-1, vmax=1)
1502        plt.set_cmap('RdBu')
1503        plt.colorbar()
1504        plt.draw()
1505
1506    if correlation is True:
1507        return corr
1508
1509    errors = [o.dvalue for o in obs]
1510    cov = np.diag(errors) @ corr @ np.diag(errors)
1511
1512    eigenvalues = np.linalg.eigh(cov)[0]
1513    if not np.all(eigenvalues >= 0):
1514        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1515
1516    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None):
1596def import_jackknife(jacks, name, idl=None):
1597    """Imports jackknife samples and returns an Obs
1598
1599    Parameters
1600    ----------
1601    jacks : numpy.ndarray
1602        numpy array containing the mean value as zeroth entry and
1603        the N jackknife samples as first to Nth entry.
1604    name : str
1605        name of the ensemble the samples are defined on.
1606    """
1607    length = len(jacks) - 1
1608    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1609    samples = jacks[1:] @ prj
1610    mean = np.mean(samples)
1611    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1612    new_obs._value = jacks[0]
1613    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def import_bootstrap(boots, name, random_numbers):
1616def import_bootstrap(boots, name, random_numbers):
1617    """Imports bootstrap samples and returns an Obs
1618
1619    Parameters
1620    ----------
1621    boots : numpy.ndarray
1622        numpy array containing the mean value as zeroth entry and
1623        the N bootstrap samples as first to Nth entry.
1624    name : str
1625        name of the ensemble the samples are defined on.
1626    random_numbers : np.ndarray
1627        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1628        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1629        chain to be reconstructed.
1630    """
1631    samples, length = random_numbers.shape
1632    if samples != len(boots) - 1:
1633        raise ValueError("Random numbers do not have the correct shape.")
1634
1635    if samples < length:
1636        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1637
1638    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1639
1640    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1641    ret = Obs([samples], [name])
1642    ret._value = boots[0]
1643    return ret

Imports bootstrap samples and returns an Obs

Parameters
  • boots (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N bootstrap samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
  • random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, where samples is the number of bootstrap samples and length is the length of the original Monte Carlo chain to be reconstructed.
def merge_obs(list_of_obs):
1646def merge_obs(list_of_obs):
1647    """Combine all observables in list_of_obs into one new observable
1648
1649    Parameters
1650    ----------
1651    list_of_obs : list
1652        list of the Obs object to be combined
1653
1654    Notes
1655    -----
1656    It is not possible to combine obs which are based on the same replicum
1657    """
1658    replist = [item for obs in list_of_obs for item in obs.names]
1659    if (len(replist) == len(set(replist))) is False:
1660        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1661    if any([len(o.cov_names) for o in list_of_obs]):
1662        raise Exception('Not possible to merge data that contains covobs!')
1663    new_dict = {}
1664    idl_dict = {}
1665    for o in list_of_obs:
1666        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1667                        for key in set(o.deltas) | set(o.r_values)})
1668        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1669
1670    names = sorted(new_dict.keys())
1671    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1672    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1673    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None):
1676def cov_Obs(means, cov, name, grad=None):
1677    """Create an Obs based on mean(s) and a covariance matrix
1678
1679    Parameters
1680    ----------
1681    mean : list of floats or float
1682        N mean value(s) of the new Obs
1683    cov : list or array
1684        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1685    name : str
1686        identifier for the covariance matrix
1687    grad : list or array
1688        Gradient of the Covobs wrt. the means belonging to cov.
1689    """
1690
1691    def covobs_to_obs(co):
1692        """Make an Obs out of a Covobs
1693
1694        Parameters
1695        ----------
1696        co : Covobs
1697            Covobs to be embedded into the Obs
1698        """
1699        o = Obs([], [], means=[])
1700        o._value = co.value
1701        o.names.append(co.name)
1702        o._covobs[co.name] = co
1703        o._dvalue = np.sqrt(co.errsq())
1704        return o
1705
1706    ol = []
1707    if isinstance(means, (float, int)):
1708        means = [means]
1709
1710    for i in range(len(means)):
1711        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1712    if ol[0].covobs[name].N != len(means):
1713        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1714    if len(ol) == 1:
1715        return ol[0]
1716    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.