pyerrors.obs

   1import warnings
   2import hashlib
   3import pickle
   4import numpy as np
   5import autograd.numpy as anp  # Thinly-wrapped numpy
   6import scipy
   7from autograd import jacobian
   8import matplotlib.pyplot as plt
   9from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  10import numdifftools as nd
  11from itertools import groupby
  12from .covobs import Covobs
  13
  14# Improve print output of numpy.ndarrays containing Obs objects.
  15np.set_printoptions(formatter={'object': lambda x: str(x)})
  16
  17
  18class Obs:
  19    """Class for a general observable.
  20
  21    Instances of Obs are the basic objects of a pyerrors error analysis.
  22    They are initialized with a list which contains arrays of samples for
  23    different ensembles/replica and another list of same length which contains
  24    the names of the ensembles/replica. Mathematical operations can be
  25    performed on instances. The result is another instance of Obs. The error of
  26    an instance can be computed with the gamma_method. Also contains additional
  27    methods for output and visualization of the error calculation.
  28
  29    Attributes
  30    ----------
  31    S_global : float
  32        Standard value for S (default 2.0)
  33    S_dict : dict
  34        Dictionary for S values. If an entry for a given ensemble
  35        exists this overwrites the standard value for that ensemble.
  36    tau_exp_global : float
  37        Standard value for tau_exp (default 0.0)
  38    tau_exp_dict : dict
  39        Dictionary for tau_exp values. If an entry for a given ensemble exists
  40        this overwrites the standard value for that ensemble.
  41    N_sigma_global : float
  42        Standard value for N_sigma (default 1.0)
  43    N_sigma_dict : dict
  44        Dictionary for N_sigma values. If an entry for a given ensemble exists
  45        this overwrites the standard value for that ensemble.
  46    """
  47    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  48                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  49                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  50                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  51                 'idl', 'tag', '_covobs', '__dict__']
  52
  53    S_global = 2.0
  54    S_dict = {}
  55    tau_exp_global = 0.0
  56    tau_exp_dict = {}
  57    N_sigma_global = 1.0
  58    N_sigma_dict = {}
  59
  60    def __init__(self, samples, names, idl=None, **kwargs):
  61        """ Initialize Obs object.
  62
  63        Parameters
  64        ----------
  65        samples : list
  66            list of numpy arrays containing the Monte Carlo samples
  67        names : list
  68            list of strings labeling the individual samples
  69        idl : list, optional
  70            list of ranges or lists on which the samples are defined
  71        """
  72
  73        if kwargs.get("means") is None and len(samples):
  74            if len(samples) != len(names):
  75                raise ValueError('Length of samples and names incompatible.')
  76            if idl is not None:
  77                if len(idl) != len(names):
  78                    raise ValueError('Length of idl incompatible with samples and names.')
  79            name_length = len(names)
  80            if name_length > 1:
  81                if name_length != len(set(names)):
  82                    raise ValueError('Names are not unique.')
  83                if not all(isinstance(x, str) for x in names):
  84                    raise TypeError('All names have to be strings.')
  85            else:
  86                if not isinstance(names[0], str):
  87                    raise TypeError('All names have to be strings.')
  88            if min(len(x) for x in samples) <= 4:
  89                raise ValueError('Samples have to have at least 5 entries.')
  90
  91        self.names = sorted(names)
  92        self.shape = {}
  93        self.r_values = {}
  94        self.deltas = {}
  95        self._covobs = {}
  96
  97        self._value = 0
  98        self.N = 0
  99        self.idl = {}
 100        if idl is not None:
 101            for name, idx in sorted(zip(names, idl)):
 102                if isinstance(idx, range):
 103                    self.idl[name] = idx
 104                elif isinstance(idx, (list, np.ndarray)):
 105                    dc = np.unique(np.diff(idx))
 106                    if np.any(dc < 0):
 107                        raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]])))
 108                    elif np.any(dc == 0):
 109                        raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]])))
 110                    if len(dc) == 1:
 111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 112                    else:
 113                        self.idl[name] = list(idx)
 114                else:
 115                    raise TypeError('incompatible type for idl[%s].' % (name))
 116        else:
 117            for name, sample in sorted(zip(names, samples)):
 118                self.idl[name] = range(1, len(sample) + 1)
 119
 120        if kwargs.get("means") is not None:
 121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 122                self.shape[name] = len(self.idl[name])
 123                self.N += self.shape[name]
 124                self.r_values[name] = mean
 125                self.deltas[name] = sample
 126        else:
 127            for name, sample in sorted(zip(names, samples)):
 128                self.shape[name] = len(self.idl[name])
 129                self.N += self.shape[name]
 130                if len(sample) != self.shape[name]:
 131                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 132                self.r_values[name] = np.mean(sample)
 133                self.deltas[name] = sample - self.r_values[name]
 134                self._value += self.shape[name] * self.r_values[name]
 135            self._value /= self.N
 136
 137        self._dvalue = 0.0
 138        self.ddvalue = 0.0
 139        self.reweighted = False
 140
 141        self.tag = None
 142
 143    @property
 144    def value(self):
 145        return self._value
 146
 147    @property
 148    def dvalue(self):
 149        return self._dvalue
 150
 151    @property
 152    def e_names(self):
 153        return sorted(set([o.split('|')[0] for o in self.names]))
 154
 155    @property
 156    def cov_names(self):
 157        return sorted(set([o for o in self.covobs.keys()]))
 158
 159    @property
 160    def mc_names(self):
 161        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 162
 163    @property
 164    def e_content(self):
 165        res = {}
 166        for e, e_name in enumerate(self.e_names):
 167            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 168            if e_name in self.names:
 169                res[e_name].append(e_name)
 170        return res
 171
 172    @property
 173    def covobs(self):
 174        return self._covobs
 175
 176    def gamma_method(self, **kwargs):
 177        """Estimate the error and related properties of the Obs.
 178
 179        Parameters
 180        ----------
 181        S : float
 182            specifies a custom value for the parameter S (default 2.0).
 183            If set to 0 it is assumed that the data exhibits no
 184            autocorrelation. In this case the error estimates coincides
 185            with the sample standard error.
 186        tau_exp : float
 187            positive value triggers the critical slowing down analysis
 188            (default 0.0).
 189        N_sigma : float
 190            number of standard deviations from zero until the tail is
 191            attached to the autocorrelation function (default 1).
 192        fft : bool
 193            determines whether the fft algorithm is used for the computation
 194            of the autocorrelation function (default True)
 195        """
 196
 197        e_content = self.e_content
 198        self.e_dvalue = {}
 199        self.e_ddvalue = {}
 200        self.e_tauint = {}
 201        self.e_dtauint = {}
 202        self.e_windowsize = {}
 203        self.e_n_tauint = {}
 204        self.e_n_dtauint = {}
 205        e_gamma = {}
 206        self.e_rho = {}
 207        self.e_drho = {}
 208        self._dvalue = 0
 209        self.ddvalue = 0
 210
 211        self.S = {}
 212        self.tau_exp = {}
 213        self.N_sigma = {}
 214
 215        if kwargs.get('fft') is False:
 216            fft = False
 217        else:
 218            fft = True
 219
 220        def _parse_kwarg(kwarg_name):
 221            if kwarg_name in kwargs:
 222                tmp = kwargs.get(kwarg_name)
 223                if isinstance(tmp, (int, float)):
 224                    if tmp < 0:
 225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 226                    for e, e_name in enumerate(self.e_names):
 227                        getattr(self, kwarg_name)[e_name] = tmp
 228                else:
 229                    raise TypeError(kwarg_name + ' is not in proper format.')
 230            else:
 231                for e, e_name in enumerate(self.e_names):
 232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 234                    else:
 235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 236
 237        _parse_kwarg('S')
 238        _parse_kwarg('tau_exp')
 239        _parse_kwarg('N_sigma')
 240
 241        for e, e_name in enumerate(self.mc_names):
 242            gapsize = _determine_gap(self, e_content, e_name)
 243
 244            r_length = []
 245            for r_name in e_content[e_name]:
 246                if isinstance(self.idl[r_name], range):
 247                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
 248                else:
 249                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
 250
 251            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 252            w_max = max(r_length) // 2
 253            e_gamma[e_name] = np.zeros(w_max)
 254            self.e_rho[e_name] = np.zeros(w_max)
 255            self.e_drho[e_name] = np.zeros(w_max)
 256
 257            for r_name in e_content[e_name]:
 258                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 259
 260            gamma_div = np.zeros(w_max)
 261            for r_name in e_content[e_name]:
 262                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 263            gamma_div[gamma_div < 1] = 1.0
 264            e_gamma[e_name] /= gamma_div[:w_max]
 265
 266            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 267                self.e_tauint[e_name] = 0.5
 268                self.e_dtauint[e_name] = 0.0
 269                self.e_dvalue[e_name] = 0.0
 270                self.e_ddvalue[e_name] = 0.0
 271                self.e_windowsize[e_name] = 0
 272                continue
 273
 274            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 275            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 276            # Make sure no entry of tauint is smaller than 0.5
 277            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 278            # hep-lat/0306017 eq. (42)
 279            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 280            self.e_n_dtauint[e_name][0] = 0.0
 281
 282            def _compute_drho(i):
 283                tmp = (self.e_rho[e_name][i + 1:w_max]
 284                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
 285                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
 286                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
 287                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 288
 289            if self.tau_exp[e_name] > 0:
 290                _compute_drho(1)
 291                texp = self.tau_exp[e_name]
 292                # Critical slowing down analysis
 293                if w_max // 2 <= 1:
 294                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 295                for n in range(1, w_max // 2):
 296                    _compute_drho(n + 1)
 297                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 298                        # Bias correction hep-lat/0306017 eq. (49) included
 299                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 300                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 301                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 302                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 303                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 304                        self.e_windowsize[e_name] = n
 305                        break
 306            else:
 307                if self.S[e_name] == 0.0:
 308                    self.e_tauint[e_name] = 0.5
 309                    self.e_dtauint[e_name] = 0.0
 310                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 311                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 312                    self.e_windowsize[e_name] = 0
 313                else:
 314                    # Standard automatic windowing procedure
 315                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 316                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 317                    for n in range(1, w_max):
 318                        if g_w[n - 1] < 0 or n >= w_max - 1:
 319                            _compute_drho(n)
 320                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 321                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 322                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 323                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 324                            self.e_windowsize[e_name] = n
 325                            break
 326
 327            self._dvalue += self.e_dvalue[e_name] ** 2
 328            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 329
 330        for e_name in self.cov_names:
 331            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 332            self.e_ddvalue[e_name] = 0
 333            self._dvalue += self.e_dvalue[e_name]**2
 334
 335        self._dvalue = np.sqrt(self._dvalue)
 336        if self._dvalue == 0.0:
 337            self.ddvalue = 0.0
 338        else:
 339            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 340        return
 341
 342    gm = gamma_method
 343
 344    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
 345        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 346           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 347
 348        Parameters
 349        ----------
 350        deltas : list
 351            List of fluctuations
 352        idx : list
 353            List or range of configurations on which the deltas are defined.
 354        shape : int
 355            Number of configurations in idx.
 356        w_max : int
 357            Upper bound for the summation window.
 358        fft : bool
 359            determines whether the fft algorithm is used for the computation
 360            of the autocorrelation function.
 361        gapsize : int
 362            The target distance between two configurations. If longer distances
 363            are found in idx, the data is expanded.
 364        """
 365        gamma = np.zeros(w_max)
 366        deltas = _expand_deltas(deltas, idx, shape, gapsize)
 367        new_shape = len(deltas)
 368        if fft:
 369            max_gamma = min(new_shape, w_max)
 370            # The padding for the fft has to be even
 371            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 372            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 373        else:
 374            for n in range(w_max):
 375                if new_shape - n >= 0:
 376                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 377
 378        return gamma
 379
 380    def details(self, ens_content=True):
 381        """Output detailed properties of the Obs.
 382
 383        Parameters
 384        ----------
 385        ens_content : bool
 386            print details about the ensembles and replica if true.
 387        """
 388        if self.tag is not None:
 389            print("Description:", self.tag)
 390        if not hasattr(self, 'e_dvalue'):
 391            print('Result\t %3.8e' % (self.value))
 392        else:
 393            if self.value == 0.0:
 394                percentage = np.nan
 395            else:
 396                percentage = np.abs(self._dvalue / self.value) * 100
 397            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 398            if len(self.e_names) > 1:
 399                print(' Ensemble errors:')
 400            e_content = self.e_content
 401            for e_name in self.mc_names:
 402                gap = _determine_gap(self, e_content, e_name)
 403
 404                if len(self.e_names) > 1:
 405                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 406                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 407                tau_string += f" in units of {gap} config"
 408                if gap > 1:
 409                    tau_string += "s"
 410                if self.tau_exp[e_name] > 0:
 411                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 412                else:
 413                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 414                print(tau_string)
 415            for e_name in self.cov_names:
 416                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 417        if ens_content is True:
 418            if len(self.e_names) == 1:
 419                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 420            else:
 421                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 422            my_string_list = []
 423            for key, value in sorted(self.e_content.items()):
 424                if key not in self.covobs:
 425                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 426                    if len(value) == 1:
 427                        my_string += f': {self.shape[value[0]]} configurations'
 428                        if isinstance(self.idl[value[0]], range):
 429                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 430                        else:
 431                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 432                    else:
 433                        sublist = []
 434                        for v in value:
 435                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 436                            my_substring += f': {self.shape[v]} configurations'
 437                            if isinstance(self.idl[v], range):
 438                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 439                            else:
 440                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 441                            sublist.append(my_substring)
 442
 443                        my_string += '\n' + '\n'.join(sublist)
 444                else:
 445                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 446                my_string_list.append(my_string)
 447            print('\n'.join(my_string_list))
 448
 449    def reweight(self, weight):
 450        """Reweight the obs with given rewighting factors.
 451
 452        Parameters
 453        ----------
 454        weight : Obs
 455            Reweighting factor. An Observable that has to be defined on a superset of the
 456            configurations in obs[i].idl for all i.
 457        all_configs : bool
 458            if True, the reweighted observables are normalized by the average of
 459            the reweighting factor on all configurations in weight.idl and not
 460            on the configurations in obs[i].idl. Default False.
 461        """
 462        return reweight(weight, [self])[0]
 463
 464    def is_zero_within_error(self, sigma=1):
 465        """Checks whether the observable is zero within 'sigma' standard errors.
 466
 467        Parameters
 468        ----------
 469        sigma : int
 470            Number of standard errors used for the check.
 471
 472        Works only properly when the gamma method was run.
 473        """
 474        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 475
 476    def is_zero(self, atol=1e-10):
 477        """Checks whether the observable is zero within a given tolerance.
 478
 479        Parameters
 480        ----------
 481        atol : float
 482            Absolute tolerance (for details see numpy documentation).
 483        """
 484        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 485
 486    def plot_tauint(self, save=None):
 487        """Plot integrated autocorrelation time for each ensemble.
 488
 489        Parameters
 490        ----------
 491        save : str
 492            saves the figure to a file named 'save' if.
 493        """
 494        if not hasattr(self, 'e_dvalue'):
 495            raise Exception('Run the gamma method first.')
 496
 497        for e, e_name in enumerate(self.mc_names):
 498            fig = plt.figure()
 499            plt.xlabel(r'$W$')
 500            plt.ylabel(r'$\tau_\mathrm{int}$')
 501            length = int(len(self.e_n_tauint[e_name]))
 502            if self.tau_exp[e_name] > 0:
 503                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 504                x_help = np.arange(2 * self.tau_exp[e_name])
 505                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 506                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 507                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 508                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 509                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 510                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 511                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 512            else:
 513                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 514                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 515
 516            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 517            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 518            plt.legend()
 519            plt.xlim(-0.5, xmax)
 520            ylim = plt.ylim()
 521            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 522            plt.draw()
 523            if save:
 524                fig.savefig(save + "_" + str(e))
 525
 526    def plot_rho(self, save=None):
 527        """Plot normalized autocorrelation function time for each ensemble.
 528
 529        Parameters
 530        ----------
 531        save : str
 532            saves the figure to a file named 'save' if.
 533        """
 534        if not hasattr(self, 'e_dvalue'):
 535            raise Exception('Run the gamma method first.')
 536        for e, e_name in enumerate(self.mc_names):
 537            fig = plt.figure()
 538            plt.xlabel('W')
 539            plt.ylabel('rho')
 540            length = int(len(self.e_drho[e_name]))
 541            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 542            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 543            if self.tau_exp[e_name] > 0:
 544                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 545                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 546                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 547                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 548            else:
 549                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 550                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 551            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 552            plt.xlim(-0.5, xmax)
 553            plt.draw()
 554            if save:
 555                fig.savefig(save + "_" + str(e))
 556
 557    def plot_rep_dist(self):
 558        """Plot replica distribution for each ensemble with more than one replicum."""
 559        if not hasattr(self, 'e_dvalue'):
 560            raise Exception('Run the gamma method first.')
 561        for e, e_name in enumerate(self.mc_names):
 562            if len(self.e_content[e_name]) == 1:
 563                print('No replica distribution for a single replicum (', e_name, ')')
 564                continue
 565            r_length = []
 566            sub_r_mean = 0
 567            for r, r_name in enumerate(self.e_content[e_name]):
 568                r_length.append(len(self.deltas[r_name]))
 569                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 570            e_N = np.sum(r_length)
 571            sub_r_mean /= e_N
 572            arr = np.zeros(len(self.e_content[e_name]))
 573            for r, r_name in enumerate(self.e_content[e_name]):
 574                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 575            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 576            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 577            plt.draw()
 578
 579    def plot_history(self, expand=True):
 580        """Plot derived Monte Carlo history for each ensemble
 581
 582        Parameters
 583        ----------
 584        expand : bool
 585            show expanded history for irregular Monte Carlo chains (default: True).
 586        """
 587        for e, e_name in enumerate(self.mc_names):
 588            plt.figure()
 589            r_length = []
 590            tmp = []
 591            tmp_expanded = []
 592            for r, r_name in enumerate(self.e_content[e_name]):
 593                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 594                if expand:
 595                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
 596                    r_length.append(len(tmp_expanded[-1]))
 597                else:
 598                    r_length.append(len(tmp[-1]))
 599            e_N = np.sum(r_length)
 600            x = np.arange(e_N)
 601            y_test = np.concatenate(tmp, axis=0)
 602            if expand:
 603                y = np.concatenate(tmp_expanded, axis=0)
 604            else:
 605                y = y_test
 606            plt.errorbar(x, y, fmt='.', markersize=3)
 607            plt.xlim(-0.5, e_N - 0.5)
 608            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 609            plt.draw()
 610
 611    def plot_piechart(self, save=None):
 612        """Plot piechart which shows the fractional contribution of each
 613        ensemble to the error and returns a dictionary containing the fractions.
 614
 615        Parameters
 616        ----------
 617        save : str
 618            saves the figure to a file named 'save' if.
 619        """
 620        if not hasattr(self, 'e_dvalue'):
 621            raise Exception('Run the gamma method first.')
 622        if np.isclose(0.0, self._dvalue, atol=1e-15):
 623            raise Exception('Error is 0.0')
 624        labels = self.e_names
 625        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 626        fig1, ax1 = plt.subplots()
 627        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 628        ax1.axis('equal')
 629        plt.draw()
 630        if save:
 631            fig1.savefig(save)
 632
 633        return dict(zip(labels, sizes))
 634
 635    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 636        """Dump the Obs to a file 'name' of chosen format.
 637
 638        Parameters
 639        ----------
 640        filename : str
 641            name of the file to be saved.
 642        datatype : str
 643            Format of the exported file. Supported formats include
 644            "json.gz" and "pickle"
 645        description : str
 646            Description for output file, only relevant for json.gz format.
 647        path : str
 648            specifies a custom path for the file (default '.')
 649        """
 650        if 'path' in kwargs:
 651            file_name = kwargs.get('path') + '/' + filename
 652        else:
 653            file_name = filename
 654
 655        if datatype == "json.gz":
 656            from .input.json import dump_to_json
 657            dump_to_json([self], file_name, description=description)
 658        elif datatype == "pickle":
 659            with open(file_name + '.p', 'wb') as fb:
 660                pickle.dump(self, fb)
 661        else:
 662            raise Exception("Unknown datatype " + str(datatype))
 663
 664    def export_jackknife(self):
 665        """Export jackknife samples from the Obs
 666
 667        Returns
 668        -------
 669        numpy.ndarray
 670            Returns a numpy array of length N + 1 where N is the number of samples
 671            for the given ensemble and replicum. The zeroth entry of the array contains
 672            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 673            derived from the Obs. The current implementation only works for observables
 674            defined on exactly one ensemble and replicum. The derived jackknife samples
 675            should agree with samples from a full jackknife analysis up to O(1/N).
 676        """
 677
 678        if len(self.names) != 1:
 679            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 680
 681        name = self.names[0]
 682        full_data = self.deltas[name] + self.r_values[name]
 683        n = full_data.size
 684        mean = self.value
 685        tmp_jacks = np.zeros(n + 1)
 686        tmp_jacks[0] = mean
 687        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 688        return tmp_jacks
 689
 690    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
 691        """Export bootstrap samples from the Obs
 692
 693        Parameters
 694        ----------
 695        samples : int
 696            Number of bootstrap samples to generate.
 697        random_numbers : np.ndarray
 698            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
 699            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
 700        save_rng : str
 701            Save the random numbers to a file if a path is specified.
 702
 703        Returns
 704        -------
 705        numpy.ndarray
 706            Returns a numpy array of length N + 1 where N is the number of samples
 707            for the given ensemble and replicum. The zeroth entry of the array contains
 708            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
 709            derived from the Obs. The current implementation only works for observables
 710            defined on exactly one ensemble and replicum. The derived bootstrap samples
 711            should agree with samples from a full bootstrap analysis up to O(1/N).
 712        """
 713        if len(self.names) != 1:
 714            raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
 715
 716        name = self.names[0]
 717        length = self.N
 718
 719        if random_numbers is None:
 720            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
 721            rng = np.random.default_rng(seed)
 722            random_numbers = rng.integers(0, length, size=(samples, length))
 723
 724        if save_rng is not None:
 725            np.savetxt(save_rng, random_numbers, fmt='%i')
 726
 727        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
 728        ret = np.zeros(samples + 1)
 729        ret[0] = self.value
 730        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
 731        return ret
 732
 733    def __float__(self):
 734        return float(self.value)
 735
 736    def __repr__(self):
 737        return 'Obs[' + str(self) + ']'
 738
 739    def __str__(self):
 740        return _format_uncertainty(self.value, self._dvalue)
 741
 742    def __format__(self, format_type):
 743        if format_type == "":
 744            significance = 2
 745        else:
 746            significance = int(float(format_type.replace("+", "").replace("-", "")))
 747        my_str = _format_uncertainty(self.value, self._dvalue,
 748                                     significance=significance)
 749        for char in ["+", " "]:
 750            if format_type.startswith(char):
 751                if my_str[0] != "-":
 752                    my_str = char + my_str
 753        return my_str
 754
 755    def __hash__(self):
 756        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 757        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 758        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 759        hash_tuple += tuple([o.encode() for o in self.names])
 760        m = hashlib.md5()
 761        [m.update(o) for o in hash_tuple]
 762        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 763
 764    # Overload comparisons
 765    def __lt__(self, other):
 766        return self.value < other
 767
 768    def __le__(self, other):
 769        return self.value <= other
 770
 771    def __gt__(self, other):
 772        return self.value > other
 773
 774    def __ge__(self, other):
 775        return self.value >= other
 776
 777    def __eq__(self, other):
 778        if other is None:
 779            return False
 780        return (self - other).is_zero()
 781
 782    # Overload math operations
 783    def __add__(self, y):
 784        if isinstance(y, Obs):
 785            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 786        else:
 787            if isinstance(y, np.ndarray):
 788                return np.array([self + o for o in y])
 789            elif isinstance(y, complex):
 790                return CObs(self, 0) + y
 791            elif y.__class__.__name__ in ['Corr', 'CObs']:
 792                return NotImplemented
 793            else:
 794                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 795
 796    def __radd__(self, y):
 797        return self + y
 798
 799    def __mul__(self, y):
 800        if isinstance(y, Obs):
 801            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 802        else:
 803            if isinstance(y, np.ndarray):
 804                return np.array([self * o for o in y])
 805            elif isinstance(y, complex):
 806                return CObs(self * y.real, self * y.imag)
 807            elif y.__class__.__name__ in ['Corr', 'CObs']:
 808                return NotImplemented
 809            else:
 810                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 811
 812    def __rmul__(self, y):
 813        return self * y
 814
 815    def __sub__(self, y):
 816        if isinstance(y, Obs):
 817            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 818        else:
 819            if isinstance(y, np.ndarray):
 820                return np.array([self - o for o in y])
 821            elif y.__class__.__name__ in ['Corr', 'CObs']:
 822                return NotImplemented
 823            else:
 824                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 825
 826    def __rsub__(self, y):
 827        return -1 * (self - y)
 828
 829    def __pos__(self):
 830        return self
 831
 832    def __neg__(self):
 833        return -1 * self
 834
 835    def __truediv__(self, y):
 836        if isinstance(y, Obs):
 837            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 838        else:
 839            if isinstance(y, np.ndarray):
 840                return np.array([self / o for o in y])
 841            elif y.__class__.__name__ in ['Corr', 'CObs']:
 842                return NotImplemented
 843            else:
 844                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 845
 846    def __rtruediv__(self, y):
 847        if isinstance(y, Obs):
 848            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 849        else:
 850            if isinstance(y, np.ndarray):
 851                return np.array([o / self for o in y])
 852            elif y.__class__.__name__ in ['Corr', 'CObs']:
 853                return NotImplemented
 854            else:
 855                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 856
 857    def __pow__(self, y):
 858        if isinstance(y, Obs):
 859            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 860        else:
 861            return derived_observable(lambda x: x[0] ** y, [self])
 862
 863    def __rpow__(self, y):
 864        if isinstance(y, Obs):
 865            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 866        else:
 867            return derived_observable(lambda x: y ** x[0], [self])
 868
 869    def __abs__(self):
 870        return derived_observable(lambda x: anp.abs(x[0]), [self])
 871
 872    # Overload numpy functions
 873    def sqrt(self):
 874        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 875
 876    def log(self):
 877        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 878
 879    def exp(self):
 880        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 881
 882    def sin(self):
 883        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 884
 885    def cos(self):
 886        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 887
 888    def tan(self):
 889        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 890
 891    def arcsin(self):
 892        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 893
 894    def arccos(self):
 895        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 896
 897    def arctan(self):
 898        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 899
 900    def sinh(self):
 901        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 902
 903    def cosh(self):
 904        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 905
 906    def tanh(self):
 907        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 908
 909    def arcsinh(self):
 910        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 911
 912    def arccosh(self):
 913        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 914
 915    def arctanh(self):
 916        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 917
 918
 919class CObs:
 920    """Class for a complex valued observable."""
 921    __slots__ = ['_real', '_imag', 'tag']
 922
 923    def __init__(self, real, imag=0.0):
 924        self._real = real
 925        self._imag = imag
 926        self.tag = None
 927
 928    @property
 929    def real(self):
 930        return self._real
 931
 932    @property
 933    def imag(self):
 934        return self._imag
 935
 936    def gamma_method(self, **kwargs):
 937        """Executes the gamma_method for the real and the imaginary part."""
 938        if isinstance(self.real, Obs):
 939            self.real.gamma_method(**kwargs)
 940        if isinstance(self.imag, Obs):
 941            self.imag.gamma_method(**kwargs)
 942
 943    def is_zero(self):
 944        """Checks whether both real and imaginary part are zero within machine precision."""
 945        return self.real == 0.0 and self.imag == 0.0
 946
 947    def conjugate(self):
 948        return CObs(self.real, -self.imag)
 949
 950    def __add__(self, other):
 951        if isinstance(other, np.ndarray):
 952            return other + self
 953        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 954            return CObs(self.real + other.real,
 955                        self.imag + other.imag)
 956        else:
 957            return CObs(self.real + other, self.imag)
 958
 959    def __radd__(self, y):
 960        return self + y
 961
 962    def __sub__(self, other):
 963        if isinstance(other, np.ndarray):
 964            return -1 * (other - self)
 965        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 966            return CObs(self.real - other.real, self.imag - other.imag)
 967        else:
 968            return CObs(self.real - other, self.imag)
 969
 970    def __rsub__(self, other):
 971        return -1 * (self - other)
 972
 973    def __mul__(self, other):
 974        if isinstance(other, np.ndarray):
 975            return other * self
 976        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 977            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 978                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 979                                               [self.real, other.real, self.imag, other.imag],
 980                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 981                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 982                                               [self.real, other.real, self.imag, other.imag],
 983                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 984            elif getattr(other, 'imag', 0) != 0:
 985                return CObs(self.real * other.real - self.imag * other.imag,
 986                            self.imag * other.real + self.real * other.imag)
 987            else:
 988                return CObs(self.real * other.real, self.imag * other.real)
 989        else:
 990            return CObs(self.real * other, self.imag * other)
 991
 992    def __rmul__(self, other):
 993        return self * other
 994
 995    def __truediv__(self, other):
 996        if isinstance(other, np.ndarray):
 997            return 1 / (other / self)
 998        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 999            r = other.real ** 2 + other.imag ** 2
1000            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
1001        else:
1002            return CObs(self.real / other, self.imag / other)
1003
1004    def __rtruediv__(self, other):
1005        r = self.real ** 2 + self.imag ** 2
1006        if hasattr(other, 'real') and hasattr(other, 'imag'):
1007            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1008        else:
1009            return CObs(self.real * other / r, -self.imag * other / r)
1010
1011    def __abs__(self):
1012        return np.sqrt(self.real**2 + self.imag**2)
1013
1014    def __pos__(self):
1015        return self
1016
1017    def __neg__(self):
1018        return -1 * self
1019
1020    def __eq__(self, other):
1021        return self.real == other.real and self.imag == other.imag
1022
1023    def __str__(self):
1024        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1025
1026    def __repr__(self):
1027        return 'CObs[' + str(self) + ']'
1028
1029    def __format__(self, format_type):
1030        if format_type == "":
1031            significance = 2
1032            format_type = "2"
1033        else:
1034            significance = int(float(format_type.replace("+", "").replace("-", "")))
1035        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"
1036
1037
1038def gamma_method(x, **kwargs):
1039    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1040
1041    See docstring of pe.Obs.gamma_method for details.
1042    """
1043    return np.vectorize(lambda o: o.gm(**kwargs))(x)
1044
1045
1046gm = gamma_method
1047
1048
1049def _format_uncertainty(value, dvalue, significance=2):
1050    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
1051    if dvalue == 0.0 or (not np.isfinite(dvalue)):
1052        return str(value)
1053    if not isinstance(significance, int):
1054        raise TypeError("significance needs to be an integer.")
1055    if significance < 1:
1056        raise ValueError("significance needs to be larger than zero.")
1057    fexp = np.floor(np.log10(dvalue))
1058    if fexp < 0.0:
1059        return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f')
1060    elif fexp == 0.0:
1061        return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})"
1062    else:
1063        return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})"
1064
1065
1066def _expand_deltas(deltas, idx, shape, gapsize):
1067    """Expand deltas defined on idx to a regular range with spacing gapsize between two
1068       configurations and where holes are filled by 0.
1069       If idx is of type range, the deltas are not changed if the idx.step == gapsize.
1070
1071    Parameters
1072    ----------
1073    deltas : list
1074        List of fluctuations
1075    idx : list
1076        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1077    shape : int
1078        Number of configs in idx.
1079    gapsize : int
1080        The target distance between two configurations. If longer distances
1081        are found in idx, the data is expanded.
1082    """
1083    if isinstance(idx, range):
1084        if (idx.step == gapsize):
1085            return deltas
1086    ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize)
1087    for i in range(shape):
1088        ret[(idx[i] - idx[0]) // gapsize] = deltas[i]
1089    return ret
1090
1091
1092def _merge_idx(idl):
1093    """Returns the union of all lists in idl as range or sorted list
1094
1095    Parameters
1096    ----------
1097    idl : list
1098        List of lists or ranges.
1099    """
1100
1101    if _check_lists_equal(idl):
1102        return idl[0]
1103
1104    idunion = sorted(set().union(*idl))
1105
1106    # Check whether idunion can be expressed as range
1107    idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0])
1108    idtest = [list(idrange), idunion]
1109    if _check_lists_equal(idtest):
1110        return idrange
1111
1112    return idunion
1113
1114
1115def _intersection_idx(idl):
1116    """Returns the intersection of all lists in idl as range or sorted list
1117
1118    Parameters
1119    ----------
1120    idl : list
1121        List of lists or ranges.
1122    """
1123
1124    if _check_lists_equal(idl):
1125        return idl[0]
1126
1127    idinter = sorted(set.intersection(*[set(o) for o in idl]))
1128
1129    # Check whether idinter can be expressed as range
1130    try:
1131        idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0])
1132        idtest = [list(idrange), idinter]
1133        if _check_lists_equal(idtest):
1134            return idrange
1135    except IndexError:
1136        pass
1137
1138    return idinter
1139
1140
1141def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1142    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1143       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1144       common divisor of the step sizes is used as new step size.
1145
1146    Parameters
1147    ----------
1148    deltas : list
1149        List of fluctuations
1150    idx : list
1151        List or range of configs on which the deltas are defined.
1152        Has to be a subset of new_idx and has to be sorted in ascending order.
1153    shape : list
1154        Number of configs in idx.
1155    new_idx : list
1156        List of configs that defines the new range, has to be sorted in ascending order.
1157    """
1158
1159    if type(idx) is range and type(new_idx) is range:
1160        if idx == new_idx:
1161            return deltas
1162    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1163    for i in range(shape):
1164        ret[idx[i] - new_idx[0]] = deltas[i]
1165    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx)
1166
1167
1168def derived_observable(func, data, array_mode=False, **kwargs):
1169    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1170
1171    Parameters
1172    ----------
1173    func : object
1174        arbitrary function of the form func(data, **kwargs). For the
1175        automatic differentiation to work, all numpy functions have to have
1176        the autograd wrapper (use 'import autograd.numpy as anp').
1177    data : list
1178        list of Obs, e.g. [obs1, obs2, obs3].
1179    num_grad : bool
1180        if True, numerical derivatives are used instead of autograd
1181        (default False). To control the numerical differentiation the
1182        kwargs of numdifftools.step_generators.MaxStepGenerator
1183        can be used.
1184    man_grad : list
1185        manually supply a list or an array which contains the jacobian
1186        of func. Use cautiously, supplying the wrong derivative will
1187        not be intercepted.
1188
1189    Notes
1190    -----
1191    For simple mathematical operations it can be practical to use anonymous
1192    functions. For the ratio of two observables one can e.g. use
1193
1194    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1195    """
1196
1197    data = np.asarray(data)
1198    raveled_data = data.ravel()
1199
1200    # Workaround for matrix operations containing non Obs data
1201    if not all(isinstance(x, Obs) for x in raveled_data):
1202        for i in range(len(raveled_data)):
1203            if isinstance(raveled_data[i], (int, float)):
1204                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1205
1206    allcov = {}
1207    for o in raveled_data:
1208        for name in o.cov_names:
1209            if name in allcov:
1210                if not np.allclose(allcov[name], o.covobs[name].cov):
1211                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1212            else:
1213                allcov[name] = o.covobs[name].cov
1214
1215    n_obs = len(raveled_data)
1216    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1217    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1218    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1219
1220    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1221
1222    if data.ndim == 1:
1223        values = np.array([o.value for o in data])
1224    else:
1225        values = np.vectorize(lambda x: x.value)(data)
1226
1227    new_values = func(values, **kwargs)
1228
1229    multi = int(isinstance(new_values, np.ndarray))
1230
1231    new_r_values = {}
1232    new_idl_d = {}
1233    for name in new_sample_names:
1234        idl = []
1235        tmp_values = np.zeros(n_obs)
1236        for i, item in enumerate(raveled_data):
1237            tmp_values[i] = item.r_values.get(name, item.value)
1238            tmp_idl = item.idl.get(name)
1239            if tmp_idl is not None:
1240                idl.append(tmp_idl)
1241        if multi > 0:
1242            tmp_values = np.array(tmp_values).reshape(data.shape)
1243        new_r_values[name] = func(tmp_values, **kwargs)
1244        new_idl_d[name] = _merge_idx(idl)
1245
1246    if 'man_grad' in kwargs:
1247        deriv = np.asarray(kwargs.get('man_grad'))
1248        if new_values.shape + data.shape != deriv.shape:
1249            raise Exception('Manual derivative does not have correct shape.')
1250    elif kwargs.get('num_grad') is True:
1251        if multi > 0:
1252            raise Exception('Multi mode currently not supported for numerical derivative')
1253        options = {
1254            'base_step': 0.1,
1255            'step_ratio': 2.5}
1256        for key in options.keys():
1257            kwarg = kwargs.get(key)
1258            if kwarg is not None:
1259                options[key] = kwarg
1260        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1261        if tmp_df.size == 1:
1262            deriv = np.array([tmp_df.real])
1263        else:
1264            deriv = tmp_df.real
1265    else:
1266        deriv = jacobian(func)(values, **kwargs)
1267
1268    final_result = np.zeros(new_values.shape, dtype=object)
1269
1270    if array_mode is True:
1271
1272        class _Zero_grad():
1273            def __init__(self, N):
1274                self.grad = np.zeros((N, 1))
1275
1276        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1277        d_extracted = {}
1278        g_extracted = {}
1279        for name in new_sample_names:
1280            d_extracted[name] = []
1281            ens_length = len(new_idl_d[name])
1282            for i_dat, dat in enumerate(data):
1283                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1284        for name in new_cov_names:
1285            g_extracted[name] = []
1286            zero_grad = _Zero_grad(new_covobs_lengths[name])
1287            for i_dat, dat in enumerate(data):
1288                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1289
1290    for i_val, new_val in np.ndenumerate(new_values):
1291        new_deltas = {}
1292        new_grad = {}
1293        if array_mode is True:
1294            for name in new_sample_names:
1295                ens_length = d_extracted[name][0].shape[-1]
1296                new_deltas[name] = np.zeros(ens_length)
1297                for i_dat, dat in enumerate(d_extracted[name]):
1298                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1299            for name in new_cov_names:
1300                new_grad[name] = 0
1301                for i_dat, dat in enumerate(g_extracted[name]):
1302                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1303        else:
1304            for j_obs, obs in np.ndenumerate(data):
1305                for name in obs.names:
1306                    if name in obs.cov_names:
1307                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1308                    else:
1309                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1310
1311        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1312
1313        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1314            raise Exception('The same name has been used for deltas and covobs!')
1315        new_samples = []
1316        new_means = []
1317        new_idl = []
1318        new_names_obs = []
1319        for name in new_names:
1320            if name not in new_covobs:
1321                new_samples.append(new_deltas[name])
1322                new_idl.append(new_idl_d[name])
1323                new_means.append(new_r_values[name][i_val])
1324                new_names_obs.append(name)
1325        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1326        for name in new_covobs:
1327            final_result[i_val].names.append(name)
1328        final_result[i_val]._covobs = new_covobs
1329        final_result[i_val]._value = new_val
1330        final_result[i_val].reweighted = reweighted
1331
1332    if multi == 0:
1333        final_result = final_result.item()
1334
1335    return final_result
1336
1337
1338def _reduce_deltas(deltas, idx_old, idx_new):
1339    """Extract deltas defined on idx_old on all configs of idx_new.
1340
1341    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1342    are ordered in an ascending order.
1343
1344    Parameters
1345    ----------
1346    deltas : list
1347        List of fluctuations
1348    idx_old : list
1349        List or range of configs on which the deltas are defined
1350    idx_new : list
1351        List of configs for which we want to extract the deltas.
1352        Has to be a subset of idx_old.
1353    """
1354    if not len(deltas) == len(idx_old):
1355        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1356    if type(idx_old) is range and type(idx_new) is range:
1357        if idx_old == idx_new:
1358            return deltas
1359    if _check_lists_equal([idx_old, idx_new]):
1360        return deltas
1361    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1362    if len(indices) < len(idx_new):
1363        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1364    return np.array(deltas)[indices]
1365
1366
1367def reweight(weight, obs, **kwargs):
1368    """Reweight a list of observables.
1369
1370    Parameters
1371    ----------
1372    weight : Obs
1373        Reweighting factor. An Observable that has to be defined on a superset of the
1374        configurations in obs[i].idl for all i.
1375    obs : list
1376        list of Obs, e.g. [obs1, obs2, obs3].
1377    all_configs : bool
1378        if True, the reweighted observables are normalized by the average of
1379        the reweighting factor on all configurations in weight.idl and not
1380        on the configurations in obs[i].idl. Default False.
1381    """
1382    result = []
1383    for i in range(len(obs)):
1384        if len(obs[i].cov_names):
1385            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1386        if not set(obs[i].names).issubset(weight.names):
1387            raise Exception('Error: Ensembles do not fit')
1388        for name in obs[i].names:
1389            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1390                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1391        new_samples = []
1392        w_deltas = {}
1393        for name in sorted(obs[i].names):
1394            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1395            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1396        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1397
1398        if kwargs.get('all_configs'):
1399            new_weight = weight
1400        else:
1401            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1402
1403        result.append(tmp_obs / new_weight)
1404        result[-1].reweighted = True
1405
1406    return result
1407
1408
1409def correlate(obs_a, obs_b):
1410    """Correlate two observables.
1411
1412    Parameters
1413    ----------
1414    obs_a : Obs
1415        First observable
1416    obs_b : Obs
1417        Second observable
1418
1419    Notes
1420    -----
1421    Keep in mind to only correlate primary observables which have not been reweighted
1422    yet. The reweighting has to be applied after correlating the observables.
1423    Currently only works if ensembles are identical (this is not strictly necessary).
1424    """
1425
1426    if sorted(obs_a.names) != sorted(obs_b.names):
1427        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1428    if len(obs_a.cov_names) or len(obs_b.cov_names):
1429        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1430    for name in obs_a.names:
1431        if obs_a.shape[name] != obs_b.shape[name]:
1432            raise Exception('Shapes of ensemble', name, 'do not fit')
1433        if obs_a.idl[name] != obs_b.idl[name]:
1434            raise Exception('idl of ensemble', name, 'do not fit')
1435
1436    if obs_a.reweighted is True:
1437        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1438    if obs_b.reweighted is True:
1439        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1440
1441    new_samples = []
1442    new_idl = []
1443    for name in sorted(obs_a.names):
1444        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1445        new_idl.append(obs_a.idl[name])
1446
1447    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1448    o.reweighted = obs_a.reweighted or obs_b.reweighted
1449    return o
1450
1451
1452def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1453    r'''Calculates the error covariance matrix of a set of observables.
1454
1455    WARNING: This function should be used with care, especially for observables with support on multiple
1456             ensembles with differing autocorrelations. See the notes below for details.
1457
1458    The gamma method has to be applied first to all observables.
1459
1460    Parameters
1461    ----------
1462    obs : list or numpy.ndarray
1463        List or one dimensional array of Obs
1464    visualize : bool
1465        If True plots the corresponding normalized correlation matrix (default False).
1466    correlation : bool
1467        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1468    smooth : None or int
1469        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1470        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1471        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1472        small ones.
1473
1474    Notes
1475    -----
1476    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1477    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1478    in the absence of autocorrelation.
1479    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1480    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1481    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1482    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1483    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1484    '''
1485
1486    length = len(obs)
1487
1488    max_samples = np.max([o.N for o in obs])
1489    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1490        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1491
1492    cov = np.zeros((length, length))
1493    for i in range(length):
1494        for j in range(i, length):
1495            cov[i, j] = _covariance_element(obs[i], obs[j])
1496    cov = cov + cov.T - np.diag(np.diag(cov))
1497
1498    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1499
1500    if isinstance(smooth, int):
1501        corr = _smooth_eigenvalues(corr, smooth)
1502
1503    if visualize:
1504        plt.matshow(corr, vmin=-1, vmax=1)
1505        plt.set_cmap('RdBu')
1506        plt.colorbar()
1507        plt.draw()
1508
1509    if correlation is True:
1510        return corr
1511
1512    errors = [o.dvalue for o in obs]
1513    cov = np.diag(errors) @ corr @ np.diag(errors)
1514
1515    eigenvalues = np.linalg.eigh(cov)[0]
1516    if not np.all(eigenvalues >= 0):
1517        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1518
1519    return cov
1520
1521
1522def _smooth_eigenvalues(corr, E):
1523    """Eigenvalue smoothing as described in hep-lat/9412087
1524
1525    corr : np.ndarray
1526        correlation matrix
1527    E : integer
1528        Number of eigenvalues to be left substantially unchanged
1529    """
1530    if not (2 < E < corr.shape[0] - 1):
1531        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1532    vals, vec = np.linalg.eigh(corr)
1533    lambda_min = np.mean(vals[:-E])
1534    vals[vals < lambda_min] = lambda_min
1535    vals /= np.mean(vals)
1536    return vec @ np.diag(vals) @ vec.T
1537
1538
1539def _covariance_element(obs1, obs2):
1540    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1541
1542    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1543        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1544        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1545        return np.sum(deltas1 * deltas2)
1546
1547    if set(obs1.names).isdisjoint(set(obs2.names)):
1548        return 0.0
1549
1550    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1551        raise Exception('The gamma method has to be applied to both Obs first.')
1552
1553    dvalue = 0.0
1554
1555    for e_name in obs1.mc_names:
1556
1557        if e_name not in obs2.mc_names:
1558            continue
1559
1560        idl_d = {}
1561        for r_name in obs1.e_content[e_name]:
1562            if r_name not in obs2.e_content[e_name]:
1563                continue
1564            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1565
1566        gamma = 0.0
1567
1568        for r_name in obs1.e_content[e_name]:
1569            if r_name not in obs2.e_content[e_name]:
1570                continue
1571            if len(idl_d[r_name]) == 0:
1572                continue
1573            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1574
1575        if gamma == 0.0:
1576            continue
1577
1578        gamma_div = 0.0
1579        for r_name in obs1.e_content[e_name]:
1580            if r_name not in obs2.e_content[e_name]:
1581                continue
1582            if len(idl_d[r_name]) == 0:
1583                continue
1584            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1585        gamma /= gamma_div
1586
1587        dvalue += gamma
1588
1589    for e_name in obs1.cov_names:
1590
1591        if e_name not in obs2.cov_names:
1592            continue
1593
1594        dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item()
1595
1596    return dvalue
1597
1598
1599def import_jackknife(jacks, name, idl=None):
1600    """Imports jackknife samples and returns an Obs
1601
1602    Parameters
1603    ----------
1604    jacks : numpy.ndarray
1605        numpy array containing the mean value as zeroth entry and
1606        the N jackknife samples as first to Nth entry.
1607    name : str
1608        name of the ensemble the samples are defined on.
1609    """
1610    length = len(jacks) - 1
1611    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1612    samples = jacks[1:] @ prj
1613    mean = np.mean(samples)
1614    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1615    new_obs._value = jacks[0]
1616    return new_obs
1617
1618
1619def import_bootstrap(boots, name, random_numbers):
1620    """Imports bootstrap samples and returns an Obs
1621
1622    Parameters
1623    ----------
1624    boots : numpy.ndarray
1625        numpy array containing the mean value as zeroth entry and
1626        the N bootstrap samples as first to Nth entry.
1627    name : str
1628        name of the ensemble the samples are defined on.
1629    random_numbers : np.ndarray
1630        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1631        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1632        chain to be reconstructed.
1633    """
1634    samples, length = random_numbers.shape
1635    if samples != len(boots) - 1:
1636        raise ValueError("Random numbers do not have the correct shape.")
1637
1638    if samples < length:
1639        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1640
1641    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1642
1643    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1644    ret = Obs([samples], [name])
1645    ret._value = boots[0]
1646    return ret
1647
1648
1649def merge_obs(list_of_obs):
1650    """Combine all observables in list_of_obs into one new observable
1651
1652    Parameters
1653    ----------
1654    list_of_obs : list
1655        list of the Obs object to be combined
1656
1657    Notes
1658    -----
1659    It is not possible to combine obs which are based on the same replicum
1660    """
1661    replist = [item for obs in list_of_obs for item in obs.names]
1662    if (len(replist) == len(set(replist))) is False:
1663        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1664    if any([len(o.cov_names) for o in list_of_obs]):
1665        raise Exception('Not possible to merge data that contains covobs!')
1666    new_dict = {}
1667    idl_dict = {}
1668    for o in list_of_obs:
1669        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1670                        for key in set(o.deltas) | set(o.r_values)})
1671        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1672
1673    names = sorted(new_dict.keys())
1674    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1675    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1676    return o
1677
1678
1679def cov_Obs(means, cov, name, grad=None):
1680    """Create an Obs based on mean(s) and a covariance matrix
1681
1682    Parameters
1683    ----------
1684    mean : list of floats or float
1685        N mean value(s) of the new Obs
1686    cov : list or array
1687        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1688    name : str
1689        identifier for the covariance matrix
1690    grad : list or array
1691        Gradient of the Covobs wrt. the means belonging to cov.
1692    """
1693
1694    def covobs_to_obs(co):
1695        """Make an Obs out of a Covobs
1696
1697        Parameters
1698        ----------
1699        co : Covobs
1700            Covobs to be embedded into the Obs
1701        """
1702        o = Obs([], [], means=[])
1703        o._value = co.value
1704        o.names.append(co.name)
1705        o._covobs[co.name] = co
1706        o._dvalue = np.sqrt(co.errsq())
1707        return o
1708
1709    ol = []
1710    if isinstance(means, (float, int)):
1711        means = [means]
1712
1713    for i in range(len(means)):
1714        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1715    if ol[0].covobs[name].N != len(means):
1716        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1717    if len(ol) == 1:
1718        return ol[0]
1719    return ol
1720
1721
1722def _determine_gap(o, e_content, e_name):
1723    gaps = []
1724    for r_name in e_content[e_name]:
1725        if isinstance(o.idl[r_name], range):
1726            gaps.append(o.idl[r_name].step)
1727        else:
1728            gaps.append(np.min(np.diff(o.idl[r_name])))
1729
1730    gap = min(gaps)
1731    if not np.all([gi % gap == 0 for gi in gaps]):
1732        raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps)
1733
1734    return gap
1735
1736
1737def _check_lists_equal(idl):
1738    '''
1739    Use groupby to efficiently check whether all elements of idl are identical.
1740    Returns True if all elements are equal, otherwise False.
1741
1742    Parameters
1743    ----------
1744    idl : list of lists, ranges or np.ndarrays
1745    '''
1746    g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl])
1747    if next(g, True) and not next(g, False):
1748        return True
1749    return False
class Obs:
 19class Obs:
 20    """Class for a general observable.
 21
 22    Instances of Obs are the basic objects of a pyerrors error analysis.
 23    They are initialized with a list which contains arrays of samples for
 24    different ensembles/replica and another list of same length which contains
 25    the names of the ensembles/replica. Mathematical operations can be
 26    performed on instances. The result is another instance of Obs. The error of
 27    an instance can be computed with the gamma_method. Also contains additional
 28    methods for output and visualization of the error calculation.
 29
 30    Attributes
 31    ----------
 32    S_global : float
 33        Standard value for S (default 2.0)
 34    S_dict : dict
 35        Dictionary for S values. If an entry for a given ensemble
 36        exists this overwrites the standard value for that ensemble.
 37    tau_exp_global : float
 38        Standard value for tau_exp (default 0.0)
 39    tau_exp_dict : dict
 40        Dictionary for tau_exp values. If an entry for a given ensemble exists
 41        this overwrites the standard value for that ensemble.
 42    N_sigma_global : float
 43        Standard value for N_sigma (default 1.0)
 44    N_sigma_dict : dict
 45        Dictionary for N_sigma values. If an entry for a given ensemble exists
 46        this overwrites the standard value for that ensemble.
 47    """
 48    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 49                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 50                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 51                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 52                 'idl', 'tag', '_covobs', '__dict__']
 53
 54    S_global = 2.0
 55    S_dict = {}
 56    tau_exp_global = 0.0
 57    tau_exp_dict = {}
 58    N_sigma_global = 1.0
 59    N_sigma_dict = {}
 60
 61    def __init__(self, samples, names, idl=None, **kwargs):
 62        """ Initialize Obs object.
 63
 64        Parameters
 65        ----------
 66        samples : list
 67            list of numpy arrays containing the Monte Carlo samples
 68        names : list
 69            list of strings labeling the individual samples
 70        idl : list, optional
 71            list of ranges or lists on which the samples are defined
 72        """
 73
 74        if kwargs.get("means") is None and len(samples):
 75            if len(samples) != len(names):
 76                raise ValueError('Length of samples and names incompatible.')
 77            if idl is not None:
 78                if len(idl) != len(names):
 79                    raise ValueError('Length of idl incompatible with samples and names.')
 80            name_length = len(names)
 81            if name_length > 1:
 82                if name_length != len(set(names)):
 83                    raise ValueError('Names are not unique.')
 84                if not all(isinstance(x, str) for x in names):
 85                    raise TypeError('All names have to be strings.')
 86            else:
 87                if not isinstance(names[0], str):
 88                    raise TypeError('All names have to be strings.')
 89            if min(len(x) for x in samples) <= 4:
 90                raise ValueError('Samples have to have at least 5 entries.')
 91
 92        self.names = sorted(names)
 93        self.shape = {}
 94        self.r_values = {}
 95        self.deltas = {}
 96        self._covobs = {}
 97
 98        self._value = 0
 99        self.N = 0
100        self.idl = {}
101        if idl is not None:
102            for name, idx in sorted(zip(names, idl)):
103                if isinstance(idx, range):
104                    self.idl[name] = idx
105                elif isinstance(idx, (list, np.ndarray)):
106                    dc = np.unique(np.diff(idx))
107                    if np.any(dc < 0):
108                        raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]])))
109                    elif np.any(dc == 0):
110                        raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]])))
111                    if len(dc) == 1:
112                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
113                    else:
114                        self.idl[name] = list(idx)
115                else:
116                    raise TypeError('incompatible type for idl[%s].' % (name))
117        else:
118            for name, sample in sorted(zip(names, samples)):
119                self.idl[name] = range(1, len(sample) + 1)
120
121        if kwargs.get("means") is not None:
122            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
123                self.shape[name] = len(self.idl[name])
124                self.N += self.shape[name]
125                self.r_values[name] = mean
126                self.deltas[name] = sample
127        else:
128            for name, sample in sorted(zip(names, samples)):
129                self.shape[name] = len(self.idl[name])
130                self.N += self.shape[name]
131                if len(sample) != self.shape[name]:
132                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
133                self.r_values[name] = np.mean(sample)
134                self.deltas[name] = sample - self.r_values[name]
135                self._value += self.shape[name] * self.r_values[name]
136            self._value /= self.N
137
138        self._dvalue = 0.0
139        self.ddvalue = 0.0
140        self.reweighted = False
141
142        self.tag = None
143
144    @property
145    def value(self):
146        return self._value
147
148    @property
149    def dvalue(self):
150        return self._dvalue
151
152    @property
153    def e_names(self):
154        return sorted(set([o.split('|')[0] for o in self.names]))
155
156    @property
157    def cov_names(self):
158        return sorted(set([o for o in self.covobs.keys()]))
159
160    @property
161    def mc_names(self):
162        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
163
164    @property
165    def e_content(self):
166        res = {}
167        for e, e_name in enumerate(self.e_names):
168            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
169            if e_name in self.names:
170                res[e_name].append(e_name)
171        return res
172
173    @property
174    def covobs(self):
175        return self._covobs
176
177    def gamma_method(self, **kwargs):
178        """Estimate the error and related properties of the Obs.
179
180        Parameters
181        ----------
182        S : float
183            specifies a custom value for the parameter S (default 2.0).
184            If set to 0 it is assumed that the data exhibits no
185            autocorrelation. In this case the error estimates coincides
186            with the sample standard error.
187        tau_exp : float
188            positive value triggers the critical slowing down analysis
189            (default 0.0).
190        N_sigma : float
191            number of standard deviations from zero until the tail is
192            attached to the autocorrelation function (default 1).
193        fft : bool
194            determines whether the fft algorithm is used for the computation
195            of the autocorrelation function (default True)
196        """
197
198        e_content = self.e_content
199        self.e_dvalue = {}
200        self.e_ddvalue = {}
201        self.e_tauint = {}
202        self.e_dtauint = {}
203        self.e_windowsize = {}
204        self.e_n_tauint = {}
205        self.e_n_dtauint = {}
206        e_gamma = {}
207        self.e_rho = {}
208        self.e_drho = {}
209        self._dvalue = 0
210        self.ddvalue = 0
211
212        self.S = {}
213        self.tau_exp = {}
214        self.N_sigma = {}
215
216        if kwargs.get('fft') is False:
217            fft = False
218        else:
219            fft = True
220
221        def _parse_kwarg(kwarg_name):
222            if kwarg_name in kwargs:
223                tmp = kwargs.get(kwarg_name)
224                if isinstance(tmp, (int, float)):
225                    if tmp < 0:
226                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
227                    for e, e_name in enumerate(self.e_names):
228                        getattr(self, kwarg_name)[e_name] = tmp
229                else:
230                    raise TypeError(kwarg_name + ' is not in proper format.')
231            else:
232                for e, e_name in enumerate(self.e_names):
233                    if e_name in getattr(Obs, kwarg_name + '_dict'):
234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
235                    else:
236                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
237
238        _parse_kwarg('S')
239        _parse_kwarg('tau_exp')
240        _parse_kwarg('N_sigma')
241
242        for e, e_name in enumerate(self.mc_names):
243            gapsize = _determine_gap(self, e_content, e_name)
244
245            r_length = []
246            for r_name in e_content[e_name]:
247                if isinstance(self.idl[r_name], range):
248                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
249                else:
250                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
251
252            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
253            w_max = max(r_length) // 2
254            e_gamma[e_name] = np.zeros(w_max)
255            self.e_rho[e_name] = np.zeros(w_max)
256            self.e_drho[e_name] = np.zeros(w_max)
257
258            for r_name in e_content[e_name]:
259                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
260
261            gamma_div = np.zeros(w_max)
262            for r_name in e_content[e_name]:
263                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
264            gamma_div[gamma_div < 1] = 1.0
265            e_gamma[e_name] /= gamma_div[:w_max]
266
267            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
268                self.e_tauint[e_name] = 0.5
269                self.e_dtauint[e_name] = 0.0
270                self.e_dvalue[e_name] = 0.0
271                self.e_ddvalue[e_name] = 0.0
272                self.e_windowsize[e_name] = 0
273                continue
274
275            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
276            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
277            # Make sure no entry of tauint is smaller than 0.5
278            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
279            # hep-lat/0306017 eq. (42)
280            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
281            self.e_n_dtauint[e_name][0] = 0.0
282
283            def _compute_drho(i):
284                tmp = (self.e_rho[e_name][i + 1:w_max]
285                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
286                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
287                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
288                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
289
290            if self.tau_exp[e_name] > 0:
291                _compute_drho(1)
292                texp = self.tau_exp[e_name]
293                # Critical slowing down analysis
294                if w_max // 2 <= 1:
295                    raise Exception("Need at least 8 samples for tau_exp error analysis")
296                for n in range(1, w_max // 2):
297                    _compute_drho(n + 1)
298                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
299                        # Bias correction hep-lat/0306017 eq. (49) included
300                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
301                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
302                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
303                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
304                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
305                        self.e_windowsize[e_name] = n
306                        break
307            else:
308                if self.S[e_name] == 0.0:
309                    self.e_tauint[e_name] = 0.5
310                    self.e_dtauint[e_name] = 0.0
311                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
312                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
313                    self.e_windowsize[e_name] = 0
314                else:
315                    # Standard automatic windowing procedure
316                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
317                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
318                    for n in range(1, w_max):
319                        if g_w[n - 1] < 0 or n >= w_max - 1:
320                            _compute_drho(n)
321                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
322                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
323                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
324                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
325                            self.e_windowsize[e_name] = n
326                            break
327
328            self._dvalue += self.e_dvalue[e_name] ** 2
329            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
330
331        for e_name in self.cov_names:
332            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
333            self.e_ddvalue[e_name] = 0
334            self._dvalue += self.e_dvalue[e_name]**2
335
336        self._dvalue = np.sqrt(self._dvalue)
337        if self._dvalue == 0.0:
338            self.ddvalue = 0.0
339        else:
340            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
341        return
342
343    gm = gamma_method
344
345    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
346        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
347           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
348
349        Parameters
350        ----------
351        deltas : list
352            List of fluctuations
353        idx : list
354            List or range of configurations on which the deltas are defined.
355        shape : int
356            Number of configurations in idx.
357        w_max : int
358            Upper bound for the summation window.
359        fft : bool
360            determines whether the fft algorithm is used for the computation
361            of the autocorrelation function.
362        gapsize : int
363            The target distance between two configurations. If longer distances
364            are found in idx, the data is expanded.
365        """
366        gamma = np.zeros(w_max)
367        deltas = _expand_deltas(deltas, idx, shape, gapsize)
368        new_shape = len(deltas)
369        if fft:
370            max_gamma = min(new_shape, w_max)
371            # The padding for the fft has to be even
372            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
373            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
374        else:
375            for n in range(w_max):
376                if new_shape - n >= 0:
377                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
378
379        return gamma
380
381    def details(self, ens_content=True):
382        """Output detailed properties of the Obs.
383
384        Parameters
385        ----------
386        ens_content : bool
387            print details about the ensembles and replica if true.
388        """
389        if self.tag is not None:
390            print("Description:", self.tag)
391        if not hasattr(self, 'e_dvalue'):
392            print('Result\t %3.8e' % (self.value))
393        else:
394            if self.value == 0.0:
395                percentage = np.nan
396            else:
397                percentage = np.abs(self._dvalue / self.value) * 100
398            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
399            if len(self.e_names) > 1:
400                print(' Ensemble errors:')
401            e_content = self.e_content
402            for e_name in self.mc_names:
403                gap = _determine_gap(self, e_content, e_name)
404
405                if len(self.e_names) > 1:
406                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
407                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
408                tau_string += f" in units of {gap} config"
409                if gap > 1:
410                    tau_string += "s"
411                if self.tau_exp[e_name] > 0:
412                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
413                else:
414                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
415                print(tau_string)
416            for e_name in self.cov_names:
417                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
418        if ens_content is True:
419            if len(self.e_names) == 1:
420                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
421            else:
422                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
423            my_string_list = []
424            for key, value in sorted(self.e_content.items()):
425                if key not in self.covobs:
426                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
427                    if len(value) == 1:
428                        my_string += f': {self.shape[value[0]]} configurations'
429                        if isinstance(self.idl[value[0]], range):
430                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
431                        else:
432                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
433                    else:
434                        sublist = []
435                        for v in value:
436                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
437                            my_substring += f': {self.shape[v]} configurations'
438                            if isinstance(self.idl[v], range):
439                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
440                            else:
441                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
442                            sublist.append(my_substring)
443
444                        my_string += '\n' + '\n'.join(sublist)
445                else:
446                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
447                my_string_list.append(my_string)
448            print('\n'.join(my_string_list))
449
450    def reweight(self, weight):
451        """Reweight the obs with given rewighting factors.
452
453        Parameters
454        ----------
455        weight : Obs
456            Reweighting factor. An Observable that has to be defined on a superset of the
457            configurations in obs[i].idl for all i.
458        all_configs : bool
459            if True, the reweighted observables are normalized by the average of
460            the reweighting factor on all configurations in weight.idl and not
461            on the configurations in obs[i].idl. Default False.
462        """
463        return reweight(weight, [self])[0]
464
465    def is_zero_within_error(self, sigma=1):
466        """Checks whether the observable is zero within 'sigma' standard errors.
467
468        Parameters
469        ----------
470        sigma : int
471            Number of standard errors used for the check.
472
473        Works only properly when the gamma method was run.
474        """
475        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
476
477    def is_zero(self, atol=1e-10):
478        """Checks whether the observable is zero within a given tolerance.
479
480        Parameters
481        ----------
482        atol : float
483            Absolute tolerance (for details see numpy documentation).
484        """
485        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
486
487    def plot_tauint(self, save=None):
488        """Plot integrated autocorrelation time for each ensemble.
489
490        Parameters
491        ----------
492        save : str
493            saves the figure to a file named 'save' if.
494        """
495        if not hasattr(self, 'e_dvalue'):
496            raise Exception('Run the gamma method first.')
497
498        for e, e_name in enumerate(self.mc_names):
499            fig = plt.figure()
500            plt.xlabel(r'$W$')
501            plt.ylabel(r'$\tau_\mathrm{int}$')
502            length = int(len(self.e_n_tauint[e_name]))
503            if self.tau_exp[e_name] > 0:
504                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
505                x_help = np.arange(2 * self.tau_exp[e_name])
506                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
507                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
508                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
509                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
510                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
511                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
512                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
513            else:
514                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
515                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
516
517            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
518            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
519            plt.legend()
520            plt.xlim(-0.5, xmax)
521            ylim = plt.ylim()
522            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
523            plt.draw()
524            if save:
525                fig.savefig(save + "_" + str(e))
526
527    def plot_rho(self, save=None):
528        """Plot normalized autocorrelation function time for each ensemble.
529
530        Parameters
531        ----------
532        save : str
533            saves the figure to a file named 'save' if.
534        """
535        if not hasattr(self, 'e_dvalue'):
536            raise Exception('Run the gamma method first.')
537        for e, e_name in enumerate(self.mc_names):
538            fig = plt.figure()
539            plt.xlabel('W')
540            plt.ylabel('rho')
541            length = int(len(self.e_drho[e_name]))
542            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
543            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
544            if self.tau_exp[e_name] > 0:
545                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
546                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
547                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
548                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
549            else:
550                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
551                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
552            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
553            plt.xlim(-0.5, xmax)
554            plt.draw()
555            if save:
556                fig.savefig(save + "_" + str(e))
557
558    def plot_rep_dist(self):
559        """Plot replica distribution for each ensemble with more than one replicum."""
560        if not hasattr(self, 'e_dvalue'):
561            raise Exception('Run the gamma method first.')
562        for e, e_name in enumerate(self.mc_names):
563            if len(self.e_content[e_name]) == 1:
564                print('No replica distribution for a single replicum (', e_name, ')')
565                continue
566            r_length = []
567            sub_r_mean = 0
568            for r, r_name in enumerate(self.e_content[e_name]):
569                r_length.append(len(self.deltas[r_name]))
570                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
571            e_N = np.sum(r_length)
572            sub_r_mean /= e_N
573            arr = np.zeros(len(self.e_content[e_name]))
574            for r, r_name in enumerate(self.e_content[e_name]):
575                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
576            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
577            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
578            plt.draw()
579
580    def plot_history(self, expand=True):
581        """Plot derived Monte Carlo history for each ensemble
582
583        Parameters
584        ----------
585        expand : bool
586            show expanded history for irregular Monte Carlo chains (default: True).
587        """
588        for e, e_name in enumerate(self.mc_names):
589            plt.figure()
590            r_length = []
591            tmp = []
592            tmp_expanded = []
593            for r, r_name in enumerate(self.e_content[e_name]):
594                tmp.append(self.deltas[r_name] + self.r_values[r_name])
595                if expand:
596                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
597                    r_length.append(len(tmp_expanded[-1]))
598                else:
599                    r_length.append(len(tmp[-1]))
600            e_N = np.sum(r_length)
601            x = np.arange(e_N)
602            y_test = np.concatenate(tmp, axis=0)
603            if expand:
604                y = np.concatenate(tmp_expanded, axis=0)
605            else:
606                y = y_test
607            plt.errorbar(x, y, fmt='.', markersize=3)
608            plt.xlim(-0.5, e_N - 0.5)
609            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
610            plt.draw()
611
612    def plot_piechart(self, save=None):
613        """Plot piechart which shows the fractional contribution of each
614        ensemble to the error and returns a dictionary containing the fractions.
615
616        Parameters
617        ----------
618        save : str
619            saves the figure to a file named 'save' if.
620        """
621        if not hasattr(self, 'e_dvalue'):
622            raise Exception('Run the gamma method first.')
623        if np.isclose(0.0, self._dvalue, atol=1e-15):
624            raise Exception('Error is 0.0')
625        labels = self.e_names
626        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
627        fig1, ax1 = plt.subplots()
628        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
629        ax1.axis('equal')
630        plt.draw()
631        if save:
632            fig1.savefig(save)
633
634        return dict(zip(labels, sizes))
635
636    def dump(self, filename, datatype="json.gz", description="", **kwargs):
637        """Dump the Obs to a file 'name' of chosen format.
638
639        Parameters
640        ----------
641        filename : str
642            name of the file to be saved.
643        datatype : str
644            Format of the exported file. Supported formats include
645            "json.gz" and "pickle"
646        description : str
647            Description for output file, only relevant for json.gz format.
648        path : str
649            specifies a custom path for the file (default '.')
650        """
651        if 'path' in kwargs:
652            file_name = kwargs.get('path') + '/' + filename
653        else:
654            file_name = filename
655
656        if datatype == "json.gz":
657            from .input.json import dump_to_json
658            dump_to_json([self], file_name, description=description)
659        elif datatype == "pickle":
660            with open(file_name + '.p', 'wb') as fb:
661                pickle.dump(self, fb)
662        else:
663            raise Exception("Unknown datatype " + str(datatype))
664
665    def export_jackknife(self):
666        """Export jackknife samples from the Obs
667
668        Returns
669        -------
670        numpy.ndarray
671            Returns a numpy array of length N + 1 where N is the number of samples
672            for the given ensemble and replicum. The zeroth entry of the array contains
673            the mean value of the Obs, entries 1 to N contain the N jackknife samples
674            derived from the Obs. The current implementation only works for observables
675            defined on exactly one ensemble and replicum. The derived jackknife samples
676            should agree with samples from a full jackknife analysis up to O(1/N).
677        """
678
679        if len(self.names) != 1:
680            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
681
682        name = self.names[0]
683        full_data = self.deltas[name] + self.r_values[name]
684        n = full_data.size
685        mean = self.value
686        tmp_jacks = np.zeros(n + 1)
687        tmp_jacks[0] = mean
688        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
689        return tmp_jacks
690
691    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
692        """Export bootstrap samples from the Obs
693
694        Parameters
695        ----------
696        samples : int
697            Number of bootstrap samples to generate.
698        random_numbers : np.ndarray
699            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
700            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
701        save_rng : str
702            Save the random numbers to a file if a path is specified.
703
704        Returns
705        -------
706        numpy.ndarray
707            Returns a numpy array of length N + 1 where N is the number of samples
708            for the given ensemble and replicum. The zeroth entry of the array contains
709            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
710            derived from the Obs. The current implementation only works for observables
711            defined on exactly one ensemble and replicum. The derived bootstrap samples
712            should agree with samples from a full bootstrap analysis up to O(1/N).
713        """
714        if len(self.names) != 1:
715            raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
716
717        name = self.names[0]
718        length = self.N
719
720        if random_numbers is None:
721            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
722            rng = np.random.default_rng(seed)
723            random_numbers = rng.integers(0, length, size=(samples, length))
724
725        if save_rng is not None:
726            np.savetxt(save_rng, random_numbers, fmt='%i')
727
728        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
729        ret = np.zeros(samples + 1)
730        ret[0] = self.value
731        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
732        return ret
733
734    def __float__(self):
735        return float(self.value)
736
737    def __repr__(self):
738        return 'Obs[' + str(self) + ']'
739
740    def __str__(self):
741        return _format_uncertainty(self.value, self._dvalue)
742
743    def __format__(self, format_type):
744        if format_type == "":
745            significance = 2
746        else:
747            significance = int(float(format_type.replace("+", "").replace("-", "")))
748        my_str = _format_uncertainty(self.value, self._dvalue,
749                                     significance=significance)
750        for char in ["+", " "]:
751            if format_type.startswith(char):
752                if my_str[0] != "-":
753                    my_str = char + my_str
754        return my_str
755
756    def __hash__(self):
757        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
758        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
759        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
760        hash_tuple += tuple([o.encode() for o in self.names])
761        m = hashlib.md5()
762        [m.update(o) for o in hash_tuple]
763        return int(m.hexdigest(), 16) & 0xFFFFFFFF
764
765    # Overload comparisons
766    def __lt__(self, other):
767        return self.value < other
768
769    def __le__(self, other):
770        return self.value <= other
771
772    def __gt__(self, other):
773        return self.value > other
774
775    def __ge__(self, other):
776        return self.value >= other
777
778    def __eq__(self, other):
779        if other is None:
780            return False
781        return (self - other).is_zero()
782
783    # Overload math operations
784    def __add__(self, y):
785        if isinstance(y, Obs):
786            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
787        else:
788            if isinstance(y, np.ndarray):
789                return np.array([self + o for o in y])
790            elif isinstance(y, complex):
791                return CObs(self, 0) + y
792            elif y.__class__.__name__ in ['Corr', 'CObs']:
793                return NotImplemented
794            else:
795                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
796
797    def __radd__(self, y):
798        return self + y
799
800    def __mul__(self, y):
801        if isinstance(y, Obs):
802            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
803        else:
804            if isinstance(y, np.ndarray):
805                return np.array([self * o for o in y])
806            elif isinstance(y, complex):
807                return CObs(self * y.real, self * y.imag)
808            elif y.__class__.__name__ in ['Corr', 'CObs']:
809                return NotImplemented
810            else:
811                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
812
813    def __rmul__(self, y):
814        return self * y
815
816    def __sub__(self, y):
817        if isinstance(y, Obs):
818            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
819        else:
820            if isinstance(y, np.ndarray):
821                return np.array([self - o for o in y])
822            elif y.__class__.__name__ in ['Corr', 'CObs']:
823                return NotImplemented
824            else:
825                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
826
827    def __rsub__(self, y):
828        return -1 * (self - y)
829
830    def __pos__(self):
831        return self
832
833    def __neg__(self):
834        return -1 * self
835
836    def __truediv__(self, y):
837        if isinstance(y, Obs):
838            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
839        else:
840            if isinstance(y, np.ndarray):
841                return np.array([self / o for o in y])
842            elif y.__class__.__name__ in ['Corr', 'CObs']:
843                return NotImplemented
844            else:
845                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
846
847    def __rtruediv__(self, y):
848        if isinstance(y, Obs):
849            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
850        else:
851            if isinstance(y, np.ndarray):
852                return np.array([o / self for o in y])
853            elif y.__class__.__name__ in ['Corr', 'CObs']:
854                return NotImplemented
855            else:
856                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
857
858    def __pow__(self, y):
859        if isinstance(y, Obs):
860            return derived_observable(lambda x: x[0] ** x[1], [self, y])
861        else:
862            return derived_observable(lambda x: x[0] ** y, [self])
863
864    def __rpow__(self, y):
865        if isinstance(y, Obs):
866            return derived_observable(lambda x: x[0] ** x[1], [y, self])
867        else:
868            return derived_observable(lambda x: y ** x[0], [self])
869
870    def __abs__(self):
871        return derived_observable(lambda x: anp.abs(x[0]), [self])
872
873    # Overload numpy functions
874    def sqrt(self):
875        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
876
877    def log(self):
878        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
879
880    def exp(self):
881        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
882
883    def sin(self):
884        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
885
886    def cos(self):
887        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
888
889    def tan(self):
890        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
891
892    def arcsin(self):
893        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
894
895    def arccos(self):
896        return derived_observable(lambda x: anp.arccos(x[0]), [self])
897
898    def arctan(self):
899        return derived_observable(lambda x: anp.arctan(x[0]), [self])
900
901    def sinh(self):
902        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
903
904    def cosh(self):
905        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
906
907    def tanh(self):
908        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
909
910    def arcsinh(self):
911        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
912
913    def arccosh(self):
914        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
915
916    def arctanh(self):
917        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 61    def __init__(self, samples, names, idl=None, **kwargs):
 62        """ Initialize Obs object.
 63
 64        Parameters
 65        ----------
 66        samples : list
 67            list of numpy arrays containing the Monte Carlo samples
 68        names : list
 69            list of strings labeling the individual samples
 70        idl : list, optional
 71            list of ranges or lists on which the samples are defined
 72        """
 73
 74        if kwargs.get("means") is None and len(samples):
 75            if len(samples) != len(names):
 76                raise ValueError('Length of samples and names incompatible.')
 77            if idl is not None:
 78                if len(idl) != len(names):
 79                    raise ValueError('Length of idl incompatible with samples and names.')
 80            name_length = len(names)
 81            if name_length > 1:
 82                if name_length != len(set(names)):
 83                    raise ValueError('Names are not unique.')
 84                if not all(isinstance(x, str) for x in names):
 85                    raise TypeError('All names have to be strings.')
 86            else:
 87                if not isinstance(names[0], str):
 88                    raise TypeError('All names have to be strings.')
 89            if min(len(x) for x in samples) <= 4:
 90                raise ValueError('Samples have to have at least 5 entries.')
 91
 92        self.names = sorted(names)
 93        self.shape = {}
 94        self.r_values = {}
 95        self.deltas = {}
 96        self._covobs = {}
 97
 98        self._value = 0
 99        self.N = 0
100        self.idl = {}
101        if idl is not None:
102            for name, idx in sorted(zip(names, idl)):
103                if isinstance(idx, range):
104                    self.idl[name] = idx
105                elif isinstance(idx, (list, np.ndarray)):
106                    dc = np.unique(np.diff(idx))
107                    if np.any(dc < 0):
108                        raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]])))
109                    elif np.any(dc == 0):
110                        raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]])))
111                    if len(dc) == 1:
112                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
113                    else:
114                        self.idl[name] = list(idx)
115                else:
116                    raise TypeError('incompatible type for idl[%s].' % (name))
117        else:
118            for name, sample in sorted(zip(names, samples)):
119                self.idl[name] = range(1, len(sample) + 1)
120
121        if kwargs.get("means") is not None:
122            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
123                self.shape[name] = len(self.idl[name])
124                self.N += self.shape[name]
125                self.r_values[name] = mean
126                self.deltas[name] = sample
127        else:
128            for name, sample in sorted(zip(names, samples)):
129                self.shape[name] = len(self.idl[name])
130                self.N += self.shape[name]
131                if len(sample) != self.shape[name]:
132                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
133                self.r_values[name] = np.mean(sample)
134                self.deltas[name] = sample - self.r_values[name]
135                self._value += self.shape[name] * self.r_values[name]
136            self._value /= self.N
137
138        self._dvalue = 0.0
139        self.ddvalue = 0.0
140        self.reweighted = False
141
142        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
S_global = 2.0
S_dict = {}
tau_exp_global = 0.0
tau_exp_dict = {}
N_sigma_global = 1.0
N_sigma_dict = {}
names
shape
r_values
deltas
N
idl
ddvalue
reweighted
tag
value
dvalue
e_names
cov_names
mc_names
e_content
covobs
def gamma_method(self, **kwargs):
177    def gamma_method(self, **kwargs):
178        """Estimate the error and related properties of the Obs.
179
180        Parameters
181        ----------
182        S : float
183            specifies a custom value for the parameter S (default 2.0).
184            If set to 0 it is assumed that the data exhibits no
185            autocorrelation. In this case the error estimates coincides
186            with the sample standard error.
187        tau_exp : float
188            positive value triggers the critical slowing down analysis
189            (default 0.0).
190        N_sigma : float
191            number of standard deviations from zero until the tail is
192            attached to the autocorrelation function (default 1).
193        fft : bool
194            determines whether the fft algorithm is used for the computation
195            of the autocorrelation function (default True)
196        """
197
198        e_content = self.e_content
199        self.e_dvalue = {}
200        self.e_ddvalue = {}
201        self.e_tauint = {}
202        self.e_dtauint = {}
203        self.e_windowsize = {}
204        self.e_n_tauint = {}
205        self.e_n_dtauint = {}
206        e_gamma = {}
207        self.e_rho = {}
208        self.e_drho = {}
209        self._dvalue = 0
210        self.ddvalue = 0
211
212        self.S = {}
213        self.tau_exp = {}
214        self.N_sigma = {}
215
216        if kwargs.get('fft') is False:
217            fft = False
218        else:
219            fft = True
220
221        def _parse_kwarg(kwarg_name):
222            if kwarg_name in kwargs:
223                tmp = kwargs.get(kwarg_name)
224                if isinstance(tmp, (int, float)):
225                    if tmp < 0:
226                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
227                    for e, e_name in enumerate(self.e_names):
228                        getattr(self, kwarg_name)[e_name] = tmp
229                else:
230                    raise TypeError(kwarg_name + ' is not in proper format.')
231            else:
232                for e, e_name in enumerate(self.e_names):
233                    if e_name in getattr(Obs, kwarg_name + '_dict'):
234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
235                    else:
236                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
237
238        _parse_kwarg('S')
239        _parse_kwarg('tau_exp')
240        _parse_kwarg('N_sigma')
241
242        for e, e_name in enumerate(self.mc_names):
243            gapsize = _determine_gap(self, e_content, e_name)
244
245            r_length = []
246            for r_name in e_content[e_name]:
247                if isinstance(self.idl[r_name], range):
248                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
249                else:
250                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
251
252            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
253            w_max = max(r_length) // 2
254            e_gamma[e_name] = np.zeros(w_max)
255            self.e_rho[e_name] = np.zeros(w_max)
256            self.e_drho[e_name] = np.zeros(w_max)
257
258            for r_name in e_content[e_name]:
259                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
260
261            gamma_div = np.zeros(w_max)
262            for r_name in e_content[e_name]:
263                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
264            gamma_div[gamma_div < 1] = 1.0
265            e_gamma[e_name] /= gamma_div[:w_max]
266
267            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
268                self.e_tauint[e_name] = 0.5
269                self.e_dtauint[e_name] = 0.0
270                self.e_dvalue[e_name] = 0.0
271                self.e_ddvalue[e_name] = 0.0
272                self.e_windowsize[e_name] = 0
273                continue
274
275            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
276            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
277            # Make sure no entry of tauint is smaller than 0.5
278            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
279            # hep-lat/0306017 eq. (42)
280            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
281            self.e_n_dtauint[e_name][0] = 0.0
282
283            def _compute_drho(i):
284                tmp = (self.e_rho[e_name][i + 1:w_max]
285                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
286                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
287                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
288                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
289
290            if self.tau_exp[e_name] > 0:
291                _compute_drho(1)
292                texp = self.tau_exp[e_name]
293                # Critical slowing down analysis
294                if w_max // 2 <= 1:
295                    raise Exception("Need at least 8 samples for tau_exp error analysis")
296                for n in range(1, w_max // 2):
297                    _compute_drho(n + 1)
298                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
299                        # Bias correction hep-lat/0306017 eq. (49) included
300                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
301                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
302                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
303                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
304                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
305                        self.e_windowsize[e_name] = n
306                        break
307            else:
308                if self.S[e_name] == 0.0:
309                    self.e_tauint[e_name] = 0.5
310                    self.e_dtauint[e_name] = 0.0
311                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
312                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
313                    self.e_windowsize[e_name] = 0
314                else:
315                    # Standard automatic windowing procedure
316                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
317                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
318                    for n in range(1, w_max):
319                        if g_w[n - 1] < 0 or n >= w_max - 1:
320                            _compute_drho(n)
321                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
322                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
323                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
324                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
325                            self.e_windowsize[e_name] = n
326                            break
327
328            self._dvalue += self.e_dvalue[e_name] ** 2
329            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
330
331        for e_name in self.cov_names:
332            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
333            self.e_ddvalue[e_name] = 0
334            self._dvalue += self.e_dvalue[e_name]**2
335
336        self._dvalue = np.sqrt(self._dvalue)
337        if self._dvalue == 0.0:
338            self.ddvalue = 0.0
339        else:
340            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
341        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def gm(self, **kwargs):
177    def gamma_method(self, **kwargs):
178        """Estimate the error and related properties of the Obs.
179
180        Parameters
181        ----------
182        S : float
183            specifies a custom value for the parameter S (default 2.0).
184            If set to 0 it is assumed that the data exhibits no
185            autocorrelation. In this case the error estimates coincides
186            with the sample standard error.
187        tau_exp : float
188            positive value triggers the critical slowing down analysis
189            (default 0.0).
190        N_sigma : float
191            number of standard deviations from zero until the tail is
192            attached to the autocorrelation function (default 1).
193        fft : bool
194            determines whether the fft algorithm is used for the computation
195            of the autocorrelation function (default True)
196        """
197
198        e_content = self.e_content
199        self.e_dvalue = {}
200        self.e_ddvalue = {}
201        self.e_tauint = {}
202        self.e_dtauint = {}
203        self.e_windowsize = {}
204        self.e_n_tauint = {}
205        self.e_n_dtauint = {}
206        e_gamma = {}
207        self.e_rho = {}
208        self.e_drho = {}
209        self._dvalue = 0
210        self.ddvalue = 0
211
212        self.S = {}
213        self.tau_exp = {}
214        self.N_sigma = {}
215
216        if kwargs.get('fft') is False:
217            fft = False
218        else:
219            fft = True
220
221        def _parse_kwarg(kwarg_name):
222            if kwarg_name in kwargs:
223                tmp = kwargs.get(kwarg_name)
224                if isinstance(tmp, (int, float)):
225                    if tmp < 0:
226                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
227                    for e, e_name in enumerate(self.e_names):
228                        getattr(self, kwarg_name)[e_name] = tmp
229                else:
230                    raise TypeError(kwarg_name + ' is not in proper format.')
231            else:
232                for e, e_name in enumerate(self.e_names):
233                    if e_name in getattr(Obs, kwarg_name + '_dict'):
234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
235                    else:
236                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
237
238        _parse_kwarg('S')
239        _parse_kwarg('tau_exp')
240        _parse_kwarg('N_sigma')
241
242        for e, e_name in enumerate(self.mc_names):
243            gapsize = _determine_gap(self, e_content, e_name)
244
245            r_length = []
246            for r_name in e_content[e_name]:
247                if isinstance(self.idl[r_name], range):
248                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
249                else:
250                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
251
252            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
253            w_max = max(r_length) // 2
254            e_gamma[e_name] = np.zeros(w_max)
255            self.e_rho[e_name] = np.zeros(w_max)
256            self.e_drho[e_name] = np.zeros(w_max)
257
258            for r_name in e_content[e_name]:
259                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
260
261            gamma_div = np.zeros(w_max)
262            for r_name in e_content[e_name]:
263                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
264            gamma_div[gamma_div < 1] = 1.0
265            e_gamma[e_name] /= gamma_div[:w_max]
266
267            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
268                self.e_tauint[e_name] = 0.5
269                self.e_dtauint[e_name] = 0.0
270                self.e_dvalue[e_name] = 0.0
271                self.e_ddvalue[e_name] = 0.0
272                self.e_windowsize[e_name] = 0
273                continue
274
275            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
276            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
277            # Make sure no entry of tauint is smaller than 0.5
278            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
279            # hep-lat/0306017 eq. (42)
280            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
281            self.e_n_dtauint[e_name][0] = 0.0
282
283            def _compute_drho(i):
284                tmp = (self.e_rho[e_name][i + 1:w_max]
285                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
286                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
287                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
288                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
289
290            if self.tau_exp[e_name] > 0:
291                _compute_drho(1)
292                texp = self.tau_exp[e_name]
293                # Critical slowing down analysis
294                if w_max // 2 <= 1:
295                    raise Exception("Need at least 8 samples for tau_exp error analysis")
296                for n in range(1, w_max // 2):
297                    _compute_drho(n + 1)
298                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
299                        # Bias correction hep-lat/0306017 eq. (49) included
300                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
301                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
302                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
303                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
304                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
305                        self.e_windowsize[e_name] = n
306                        break
307            else:
308                if self.S[e_name] == 0.0:
309                    self.e_tauint[e_name] = 0.5
310                    self.e_dtauint[e_name] = 0.0
311                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
312                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
313                    self.e_windowsize[e_name] = 0
314                else:
315                    # Standard automatic windowing procedure
316                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
317                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
318                    for n in range(1, w_max):
319                        if g_w[n - 1] < 0 or n >= w_max - 1:
320                            _compute_drho(n)
321                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
322                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
323                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
324                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
325                            self.e_windowsize[e_name] = n
326                            break
327
328            self._dvalue += self.e_dvalue[e_name] ** 2
329            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
330
331        for e_name in self.cov_names:
332            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
333            self.e_ddvalue[e_name] = 0
334            self._dvalue += self.e_dvalue[e_name]**2
335
336        self._dvalue = np.sqrt(self._dvalue)
337        if self._dvalue == 0.0:
338            self.ddvalue = 0.0
339        else:
340            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
341        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True):
381    def details(self, ens_content=True):
382        """Output detailed properties of the Obs.
383
384        Parameters
385        ----------
386        ens_content : bool
387            print details about the ensembles and replica if true.
388        """
389        if self.tag is not None:
390            print("Description:", self.tag)
391        if not hasattr(self, 'e_dvalue'):
392            print('Result\t %3.8e' % (self.value))
393        else:
394            if self.value == 0.0:
395                percentage = np.nan
396            else:
397                percentage = np.abs(self._dvalue / self.value) * 100
398            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
399            if len(self.e_names) > 1:
400                print(' Ensemble errors:')
401            e_content = self.e_content
402            for e_name in self.mc_names:
403                gap = _determine_gap(self, e_content, e_name)
404
405                if len(self.e_names) > 1:
406                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
407                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
408                tau_string += f" in units of {gap} config"
409                if gap > 1:
410                    tau_string += "s"
411                if self.tau_exp[e_name] > 0:
412                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
413                else:
414                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
415                print(tau_string)
416            for e_name in self.cov_names:
417                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
418        if ens_content is True:
419            if len(self.e_names) == 1:
420                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
421            else:
422                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
423            my_string_list = []
424            for key, value in sorted(self.e_content.items()):
425                if key not in self.covobs:
426                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
427                    if len(value) == 1:
428                        my_string += f': {self.shape[value[0]]} configurations'
429                        if isinstance(self.idl[value[0]], range):
430                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
431                        else:
432                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
433                    else:
434                        sublist = []
435                        for v in value:
436                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
437                            my_substring += f': {self.shape[v]} configurations'
438                            if isinstance(self.idl[v], range):
439                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
440                            else:
441                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
442                            sublist.append(my_substring)
443
444                        my_string += '\n' + '\n'.join(sublist)
445                else:
446                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
447                my_string_list.append(my_string)
448            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def reweight(self, weight):
450    def reweight(self, weight):
451        """Reweight the obs with given rewighting factors.
452
453        Parameters
454        ----------
455        weight : Obs
456            Reweighting factor. An Observable that has to be defined on a superset of the
457            configurations in obs[i].idl for all i.
458        all_configs : bool
459            if True, the reweighted observables are normalized by the average of
460            the reweighting factor on all configurations in weight.idl and not
461            on the configurations in obs[i].idl. Default False.
462        """
463        return reweight(weight, [self])[0]

Reweight the obs with given rewighting factors.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def is_zero_within_error(self, sigma=1):
465    def is_zero_within_error(self, sigma=1):
466        """Checks whether the observable is zero within 'sigma' standard errors.
467
468        Parameters
469        ----------
470        sigma : int
471            Number of standard errors used for the check.
472
473        Works only properly when the gamma method was run.
474        """
475        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10):
477    def is_zero(self, atol=1e-10):
478        """Checks whether the observable is zero within a given tolerance.
479
480        Parameters
481        ----------
482        atol : float
483            Absolute tolerance (for details see numpy documentation).
484        """
485        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None):
487    def plot_tauint(self, save=None):
488        """Plot integrated autocorrelation time for each ensemble.
489
490        Parameters
491        ----------
492        save : str
493            saves the figure to a file named 'save' if.
494        """
495        if not hasattr(self, 'e_dvalue'):
496            raise Exception('Run the gamma method first.')
497
498        for e, e_name in enumerate(self.mc_names):
499            fig = plt.figure()
500            plt.xlabel(r'$W$')
501            plt.ylabel(r'$\tau_\mathrm{int}$')
502            length = int(len(self.e_n_tauint[e_name]))
503            if self.tau_exp[e_name] > 0:
504                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
505                x_help = np.arange(2 * self.tau_exp[e_name])
506                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
507                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
508                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
509                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
510                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
511                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
512                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
513            else:
514                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
515                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
516
517            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
518            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
519            plt.legend()
520            plt.xlim(-0.5, xmax)
521            ylim = plt.ylim()
522            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
523            plt.draw()
524            if save:
525                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None):
527    def plot_rho(self, save=None):
528        """Plot normalized autocorrelation function time for each ensemble.
529
530        Parameters
531        ----------
532        save : str
533            saves the figure to a file named 'save' if.
534        """
535        if not hasattr(self, 'e_dvalue'):
536            raise Exception('Run the gamma method first.')
537        for e, e_name in enumerate(self.mc_names):
538            fig = plt.figure()
539            plt.xlabel('W')
540            plt.ylabel('rho')
541            length = int(len(self.e_drho[e_name]))
542            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
543            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
544            if self.tau_exp[e_name] > 0:
545                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
546                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
547                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
548                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
549            else:
550                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
551                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
552            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
553            plt.xlim(-0.5, xmax)
554            plt.draw()
555            if save:
556                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self):
558    def plot_rep_dist(self):
559        """Plot replica distribution for each ensemble with more than one replicum."""
560        if not hasattr(self, 'e_dvalue'):
561            raise Exception('Run the gamma method first.')
562        for e, e_name in enumerate(self.mc_names):
563            if len(self.e_content[e_name]) == 1:
564                print('No replica distribution for a single replicum (', e_name, ')')
565                continue
566            r_length = []
567            sub_r_mean = 0
568            for r, r_name in enumerate(self.e_content[e_name]):
569                r_length.append(len(self.deltas[r_name]))
570                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
571            e_N = np.sum(r_length)
572            sub_r_mean /= e_N
573            arr = np.zeros(len(self.e_content[e_name]))
574            for r, r_name in enumerate(self.e_content[e_name]):
575                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
576            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
577            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
578            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True):
580    def plot_history(self, expand=True):
581        """Plot derived Monte Carlo history for each ensemble
582
583        Parameters
584        ----------
585        expand : bool
586            show expanded history for irregular Monte Carlo chains (default: True).
587        """
588        for e, e_name in enumerate(self.mc_names):
589            plt.figure()
590            r_length = []
591            tmp = []
592            tmp_expanded = []
593            for r, r_name in enumerate(self.e_content[e_name]):
594                tmp.append(self.deltas[r_name] + self.r_values[r_name])
595                if expand:
596                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
597                    r_length.append(len(tmp_expanded[-1]))
598                else:
599                    r_length.append(len(tmp[-1]))
600            e_N = np.sum(r_length)
601            x = np.arange(e_N)
602            y_test = np.concatenate(tmp, axis=0)
603            if expand:
604                y = np.concatenate(tmp_expanded, axis=0)
605            else:
606                y = y_test
607            plt.errorbar(x, y, fmt='.', markersize=3)
608            plt.xlim(-0.5, e_N - 0.5)
609            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
610            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None):
612    def plot_piechart(self, save=None):
613        """Plot piechart which shows the fractional contribution of each
614        ensemble to the error and returns a dictionary containing the fractions.
615
616        Parameters
617        ----------
618        save : str
619            saves the figure to a file named 'save' if.
620        """
621        if not hasattr(self, 'e_dvalue'):
622            raise Exception('Run the gamma method first.')
623        if np.isclose(0.0, self._dvalue, atol=1e-15):
624            raise Exception('Error is 0.0')
625        labels = self.e_names
626        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
627        fig1, ax1 = plt.subplots()
628        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
629        ax1.axis('equal')
630        plt.draw()
631        if save:
632            fig1.savefig(save)
633
634        return dict(zip(labels, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs):
636    def dump(self, filename, datatype="json.gz", description="", **kwargs):
637        """Dump the Obs to a file 'name' of chosen format.
638
639        Parameters
640        ----------
641        filename : str
642            name of the file to be saved.
643        datatype : str
644            Format of the exported file. Supported formats include
645            "json.gz" and "pickle"
646        description : str
647            Description for output file, only relevant for json.gz format.
648        path : str
649            specifies a custom path for the file (default '.')
650        """
651        if 'path' in kwargs:
652            file_name = kwargs.get('path') + '/' + filename
653        else:
654            file_name = filename
655
656        if datatype == "json.gz":
657            from .input.json import dump_to_json
658            dump_to_json([self], file_name, description=description)
659        elif datatype == "pickle":
660            with open(file_name + '.p', 'wb') as fb:
661                pickle.dump(self, fb)
662        else:
663            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self):
665    def export_jackknife(self):
666        """Export jackknife samples from the Obs
667
668        Returns
669        -------
670        numpy.ndarray
671            Returns a numpy array of length N + 1 where N is the number of samples
672            for the given ensemble and replicum. The zeroth entry of the array contains
673            the mean value of the Obs, entries 1 to N contain the N jackknife samples
674            derived from the Obs. The current implementation only works for observables
675            defined on exactly one ensemble and replicum. The derived jackknife samples
676            should agree with samples from a full jackknife analysis up to O(1/N).
677        """
678
679        if len(self.names) != 1:
680            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
681
682        name = self.names[0]
683        full_data = self.deltas[name] + self.r_values[name]
684        n = full_data.size
685        mean = self.value
686        tmp_jacks = np.zeros(n + 1)
687        tmp_jacks[0] = mean
688        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
689        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
691    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
692        """Export bootstrap samples from the Obs
693
694        Parameters
695        ----------
696        samples : int
697            Number of bootstrap samples to generate.
698        random_numbers : np.ndarray
699            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
700            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
701        save_rng : str
702            Save the random numbers to a file if a path is specified.
703
704        Returns
705        -------
706        numpy.ndarray
707            Returns a numpy array of length N + 1 where N is the number of samples
708            for the given ensemble and replicum. The zeroth entry of the array contains
709            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
710            derived from the Obs. The current implementation only works for observables
711            defined on exactly one ensemble and replicum. The derived bootstrap samples
712            should agree with samples from a full bootstrap analysis up to O(1/N).
713        """
714        if len(self.names) != 1:
715            raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
716
717        name = self.names[0]
718        length = self.N
719
720        if random_numbers is None:
721            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
722            rng = np.random.default_rng(seed)
723            random_numbers = rng.integers(0, length, size=(samples, length))
724
725        if save_rng is not None:
726            np.savetxt(save_rng, random_numbers, fmt='%i')
727
728        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
729        ret = np.zeros(samples + 1)
730        ret[0] = self.value
731        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
732        return ret

Export bootstrap samples from the Obs

Parameters
  • samples (int): Number of bootstrap samples to generate.
  • random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
  • save_rng (str): Save the random numbers to a file if a path is specified.
Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived bootstrap samples should agree with samples from a full bootstrap analysis up to O(1/N).
def sqrt(self):
874    def sqrt(self):
875        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self):
877    def log(self):
878        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self):
880    def exp(self):
881        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self):
883    def sin(self):
884        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self):
886    def cos(self):
887        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self):
889    def tan(self):
890        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self):
892    def arcsin(self):
893        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self):
895    def arccos(self):
896        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self):
898    def arctan(self):
899        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self):
901    def sinh(self):
902        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self):
904    def cosh(self):
905        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self):
907    def tanh(self):
908        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self):
910    def arcsinh(self):
911        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self):
913    def arccosh(self):
914        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self):
916    def arctanh(self):
917        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
N_sigma
S
e_ddvalue
e_drho
e_dtauint
e_dvalue
e_n_dtauint
e_n_tauint
e_rho
e_tauint
e_windowsize
tau_exp
class CObs:
 920class CObs:
 921    """Class for a complex valued observable."""
 922    __slots__ = ['_real', '_imag', 'tag']
 923
 924    def __init__(self, real, imag=0.0):
 925        self._real = real
 926        self._imag = imag
 927        self.tag = None
 928
 929    @property
 930    def real(self):
 931        return self._real
 932
 933    @property
 934    def imag(self):
 935        return self._imag
 936
 937    def gamma_method(self, **kwargs):
 938        """Executes the gamma_method for the real and the imaginary part."""
 939        if isinstance(self.real, Obs):
 940            self.real.gamma_method(**kwargs)
 941        if isinstance(self.imag, Obs):
 942            self.imag.gamma_method(**kwargs)
 943
 944    def is_zero(self):
 945        """Checks whether both real and imaginary part are zero within machine precision."""
 946        return self.real == 0.0 and self.imag == 0.0
 947
 948    def conjugate(self):
 949        return CObs(self.real, -self.imag)
 950
 951    def __add__(self, other):
 952        if isinstance(other, np.ndarray):
 953            return other + self
 954        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 955            return CObs(self.real + other.real,
 956                        self.imag + other.imag)
 957        else:
 958            return CObs(self.real + other, self.imag)
 959
 960    def __radd__(self, y):
 961        return self + y
 962
 963    def __sub__(self, other):
 964        if isinstance(other, np.ndarray):
 965            return -1 * (other - self)
 966        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 967            return CObs(self.real - other.real, self.imag - other.imag)
 968        else:
 969            return CObs(self.real - other, self.imag)
 970
 971    def __rsub__(self, other):
 972        return -1 * (self - other)
 973
 974    def __mul__(self, other):
 975        if isinstance(other, np.ndarray):
 976            return other * self
 977        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 978            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 979                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 980                                               [self.real, other.real, self.imag, other.imag],
 981                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 982                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 983                                               [self.real, other.real, self.imag, other.imag],
 984                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 985            elif getattr(other, 'imag', 0) != 0:
 986                return CObs(self.real * other.real - self.imag * other.imag,
 987                            self.imag * other.real + self.real * other.imag)
 988            else:
 989                return CObs(self.real * other.real, self.imag * other.real)
 990        else:
 991            return CObs(self.real * other, self.imag * other)
 992
 993    def __rmul__(self, other):
 994        return self * other
 995
 996    def __truediv__(self, other):
 997        if isinstance(other, np.ndarray):
 998            return 1 / (other / self)
 999        elif hasattr(other, 'real') and hasattr(other, 'imag'):
1000            r = other.real ** 2 + other.imag ** 2
1001            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
1002        else:
1003            return CObs(self.real / other, self.imag / other)
1004
1005    def __rtruediv__(self, other):
1006        r = self.real ** 2 + self.imag ** 2
1007        if hasattr(other, 'real') and hasattr(other, 'imag'):
1008            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1009        else:
1010            return CObs(self.real * other / r, -self.imag * other / r)
1011
1012    def __abs__(self):
1013        return np.sqrt(self.real**2 + self.imag**2)
1014
1015    def __pos__(self):
1016        return self
1017
1018    def __neg__(self):
1019        return -1 * self
1020
1021    def __eq__(self, other):
1022        return self.real == other.real and self.imag == other.imag
1023
1024    def __str__(self):
1025        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1026
1027    def __repr__(self):
1028        return 'CObs[' + str(self) + ']'
1029
1030    def __format__(self, format_type):
1031        if format_type == "":
1032            significance = 2
1033            format_type = "2"
1034        else:
1035            significance = int(float(format_type.replace("+", "").replace("-", "")))
1036        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"

Class for a complex valued observable.

CObs(real, imag=0.0)
924    def __init__(self, real, imag=0.0):
925        self._real = real
926        self._imag = imag
927        self.tag = None
tag
real
imag
def gamma_method(self, **kwargs):
937    def gamma_method(self, **kwargs):
938        """Executes the gamma_method for the real and the imaginary part."""
939        if isinstance(self.real, Obs):
940            self.real.gamma_method(**kwargs)
941        if isinstance(self.imag, Obs):
942            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self):
944    def is_zero(self):
945        """Checks whether both real and imaginary part are zero within machine precision."""
946        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self):
948    def conjugate(self):
949        return CObs(self.real, -self.imag)
def gamma_method(x, **kwargs):
1039def gamma_method(x, **kwargs):
1040    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1041
1042    See docstring of pe.Obs.gamma_method for details.
1043    """
1044    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def gm(x, **kwargs):
1039def gamma_method(x, **kwargs):
1040    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1041
1042    See docstring of pe.Obs.gamma_method for details.
1043    """
1044    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def derived_observable(func, data, array_mode=False, **kwargs):
1169def derived_observable(func, data, array_mode=False, **kwargs):
1170    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1171
1172    Parameters
1173    ----------
1174    func : object
1175        arbitrary function of the form func(data, **kwargs). For the
1176        automatic differentiation to work, all numpy functions have to have
1177        the autograd wrapper (use 'import autograd.numpy as anp').
1178    data : list
1179        list of Obs, e.g. [obs1, obs2, obs3].
1180    num_grad : bool
1181        if True, numerical derivatives are used instead of autograd
1182        (default False). To control the numerical differentiation the
1183        kwargs of numdifftools.step_generators.MaxStepGenerator
1184        can be used.
1185    man_grad : list
1186        manually supply a list or an array which contains the jacobian
1187        of func. Use cautiously, supplying the wrong derivative will
1188        not be intercepted.
1189
1190    Notes
1191    -----
1192    For simple mathematical operations it can be practical to use anonymous
1193    functions. For the ratio of two observables one can e.g. use
1194
1195    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1196    """
1197
1198    data = np.asarray(data)
1199    raveled_data = data.ravel()
1200
1201    # Workaround for matrix operations containing non Obs data
1202    if not all(isinstance(x, Obs) for x in raveled_data):
1203        for i in range(len(raveled_data)):
1204            if isinstance(raveled_data[i], (int, float)):
1205                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1206
1207    allcov = {}
1208    for o in raveled_data:
1209        for name in o.cov_names:
1210            if name in allcov:
1211                if not np.allclose(allcov[name], o.covobs[name].cov):
1212                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1213            else:
1214                allcov[name] = o.covobs[name].cov
1215
1216    n_obs = len(raveled_data)
1217    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1218    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1219    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1220
1221    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1222
1223    if data.ndim == 1:
1224        values = np.array([o.value for o in data])
1225    else:
1226        values = np.vectorize(lambda x: x.value)(data)
1227
1228    new_values = func(values, **kwargs)
1229
1230    multi = int(isinstance(new_values, np.ndarray))
1231
1232    new_r_values = {}
1233    new_idl_d = {}
1234    for name in new_sample_names:
1235        idl = []
1236        tmp_values = np.zeros(n_obs)
1237        for i, item in enumerate(raveled_data):
1238            tmp_values[i] = item.r_values.get(name, item.value)
1239            tmp_idl = item.idl.get(name)
1240            if tmp_idl is not None:
1241                idl.append(tmp_idl)
1242        if multi > 0:
1243            tmp_values = np.array(tmp_values).reshape(data.shape)
1244        new_r_values[name] = func(tmp_values, **kwargs)
1245        new_idl_d[name] = _merge_idx(idl)
1246
1247    if 'man_grad' in kwargs:
1248        deriv = np.asarray(kwargs.get('man_grad'))
1249        if new_values.shape + data.shape != deriv.shape:
1250            raise Exception('Manual derivative does not have correct shape.')
1251    elif kwargs.get('num_grad') is True:
1252        if multi > 0:
1253            raise Exception('Multi mode currently not supported for numerical derivative')
1254        options = {
1255            'base_step': 0.1,
1256            'step_ratio': 2.5}
1257        for key in options.keys():
1258            kwarg = kwargs.get(key)
1259            if kwarg is not None:
1260                options[key] = kwarg
1261        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1262        if tmp_df.size == 1:
1263            deriv = np.array([tmp_df.real])
1264        else:
1265            deriv = tmp_df.real
1266    else:
1267        deriv = jacobian(func)(values, **kwargs)
1268
1269    final_result = np.zeros(new_values.shape, dtype=object)
1270
1271    if array_mode is True:
1272
1273        class _Zero_grad():
1274            def __init__(self, N):
1275                self.grad = np.zeros((N, 1))
1276
1277        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1278        d_extracted = {}
1279        g_extracted = {}
1280        for name in new_sample_names:
1281            d_extracted[name] = []
1282            ens_length = len(new_idl_d[name])
1283            for i_dat, dat in enumerate(data):
1284                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1285        for name in new_cov_names:
1286            g_extracted[name] = []
1287            zero_grad = _Zero_grad(new_covobs_lengths[name])
1288            for i_dat, dat in enumerate(data):
1289                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1290
1291    for i_val, new_val in np.ndenumerate(new_values):
1292        new_deltas = {}
1293        new_grad = {}
1294        if array_mode is True:
1295            for name in new_sample_names:
1296                ens_length = d_extracted[name][0].shape[-1]
1297                new_deltas[name] = np.zeros(ens_length)
1298                for i_dat, dat in enumerate(d_extracted[name]):
1299                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1300            for name in new_cov_names:
1301                new_grad[name] = 0
1302                for i_dat, dat in enumerate(g_extracted[name]):
1303                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1304        else:
1305            for j_obs, obs in np.ndenumerate(data):
1306                for name in obs.names:
1307                    if name in obs.cov_names:
1308                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1309                    else:
1310                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1311
1312        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1313
1314        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1315            raise Exception('The same name has been used for deltas and covobs!')
1316        new_samples = []
1317        new_means = []
1318        new_idl = []
1319        new_names_obs = []
1320        for name in new_names:
1321            if name not in new_covobs:
1322                new_samples.append(new_deltas[name])
1323                new_idl.append(new_idl_d[name])
1324                new_means.append(new_r_values[name][i_val])
1325                new_names_obs.append(name)
1326        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1327        for name in new_covobs:
1328            final_result[i_val].names.append(name)
1329        final_result[i_val]._covobs = new_covobs
1330        final_result[i_val]._value = new_val
1331        final_result[i_val].reweighted = reweighted
1332
1333    if multi == 0:
1334        final_result = final_result.item()
1335
1336    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs):
1368def reweight(weight, obs, **kwargs):
1369    """Reweight a list of observables.
1370
1371    Parameters
1372    ----------
1373    weight : Obs
1374        Reweighting factor. An Observable that has to be defined on a superset of the
1375        configurations in obs[i].idl for all i.
1376    obs : list
1377        list of Obs, e.g. [obs1, obs2, obs3].
1378    all_configs : bool
1379        if True, the reweighted observables are normalized by the average of
1380        the reweighting factor on all configurations in weight.idl and not
1381        on the configurations in obs[i].idl. Default False.
1382    """
1383    result = []
1384    for i in range(len(obs)):
1385        if len(obs[i].cov_names):
1386            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1387        if not set(obs[i].names).issubset(weight.names):
1388            raise Exception('Error: Ensembles do not fit')
1389        for name in obs[i].names:
1390            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1391                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1392        new_samples = []
1393        w_deltas = {}
1394        for name in sorted(obs[i].names):
1395            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1396            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1397        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1398
1399        if kwargs.get('all_configs'):
1400            new_weight = weight
1401        else:
1402            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1403
1404        result.append(tmp_obs / new_weight)
1405        result[-1].reweighted = True
1406
1407    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def correlate(obs_a, obs_b):
1410def correlate(obs_a, obs_b):
1411    """Correlate two observables.
1412
1413    Parameters
1414    ----------
1415    obs_a : Obs
1416        First observable
1417    obs_b : Obs
1418        Second observable
1419
1420    Notes
1421    -----
1422    Keep in mind to only correlate primary observables which have not been reweighted
1423    yet. The reweighting has to be applied after correlating the observables.
1424    Currently only works if ensembles are identical (this is not strictly necessary).
1425    """
1426
1427    if sorted(obs_a.names) != sorted(obs_b.names):
1428        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1429    if len(obs_a.cov_names) or len(obs_b.cov_names):
1430        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1431    for name in obs_a.names:
1432        if obs_a.shape[name] != obs_b.shape[name]:
1433            raise Exception('Shapes of ensemble', name, 'do not fit')
1434        if obs_a.idl[name] != obs_b.idl[name]:
1435            raise Exception('idl of ensemble', name, 'do not fit')
1436
1437    if obs_a.reweighted is True:
1438        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1439    if obs_b.reweighted is True:
1440        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1441
1442    new_samples = []
1443    new_idl = []
1444    for name in sorted(obs_a.names):
1445        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1446        new_idl.append(obs_a.idl[name])
1447
1448    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1449    o.reweighted = obs_a.reweighted or obs_b.reweighted
1450    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1453def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1454    r'''Calculates the error covariance matrix of a set of observables.
1455
1456    WARNING: This function should be used with care, especially for observables with support on multiple
1457             ensembles with differing autocorrelations. See the notes below for details.
1458
1459    The gamma method has to be applied first to all observables.
1460
1461    Parameters
1462    ----------
1463    obs : list or numpy.ndarray
1464        List or one dimensional array of Obs
1465    visualize : bool
1466        If True plots the corresponding normalized correlation matrix (default False).
1467    correlation : bool
1468        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1469    smooth : None or int
1470        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1471        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1472        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1473        small ones.
1474
1475    Notes
1476    -----
1477    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1478    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1479    in the absence of autocorrelation.
1480    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1481    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1482    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1483    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1484    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1485    '''
1486
1487    length = len(obs)
1488
1489    max_samples = np.max([o.N for o in obs])
1490    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1491        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1492
1493    cov = np.zeros((length, length))
1494    for i in range(length):
1495        for j in range(i, length):
1496            cov[i, j] = _covariance_element(obs[i], obs[j])
1497    cov = cov + cov.T - np.diag(np.diag(cov))
1498
1499    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1500
1501    if isinstance(smooth, int):
1502        corr = _smooth_eigenvalues(corr, smooth)
1503
1504    if visualize:
1505        plt.matshow(corr, vmin=-1, vmax=1)
1506        plt.set_cmap('RdBu')
1507        plt.colorbar()
1508        plt.draw()
1509
1510    if correlation is True:
1511        return corr
1512
1513    errors = [o.dvalue for o in obs]
1514    cov = np.diag(errors) @ corr @ np.diag(errors)
1515
1516    eigenvalues = np.linalg.eigh(cov)[0]
1517    if not np.all(eigenvalues >= 0):
1518        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1519
1520    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None):
1600def import_jackknife(jacks, name, idl=None):
1601    """Imports jackknife samples and returns an Obs
1602
1603    Parameters
1604    ----------
1605    jacks : numpy.ndarray
1606        numpy array containing the mean value as zeroth entry and
1607        the N jackknife samples as first to Nth entry.
1608    name : str
1609        name of the ensemble the samples are defined on.
1610    """
1611    length = len(jacks) - 1
1612    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1613    samples = jacks[1:] @ prj
1614    mean = np.mean(samples)
1615    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1616    new_obs._value = jacks[0]
1617    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def import_bootstrap(boots, name, random_numbers):
1620def import_bootstrap(boots, name, random_numbers):
1621    """Imports bootstrap samples and returns an Obs
1622
1623    Parameters
1624    ----------
1625    boots : numpy.ndarray
1626        numpy array containing the mean value as zeroth entry and
1627        the N bootstrap samples as first to Nth entry.
1628    name : str
1629        name of the ensemble the samples are defined on.
1630    random_numbers : np.ndarray
1631        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1632        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1633        chain to be reconstructed.
1634    """
1635    samples, length = random_numbers.shape
1636    if samples != len(boots) - 1:
1637        raise ValueError("Random numbers do not have the correct shape.")
1638
1639    if samples < length:
1640        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1641
1642    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1643
1644    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1645    ret = Obs([samples], [name])
1646    ret._value = boots[0]
1647    return ret

Imports bootstrap samples and returns an Obs

Parameters
  • boots (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N bootstrap samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
  • random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, where samples is the number of bootstrap samples and length is the length of the original Monte Carlo chain to be reconstructed.
def merge_obs(list_of_obs):
1650def merge_obs(list_of_obs):
1651    """Combine all observables in list_of_obs into one new observable
1652
1653    Parameters
1654    ----------
1655    list_of_obs : list
1656        list of the Obs object to be combined
1657
1658    Notes
1659    -----
1660    It is not possible to combine obs which are based on the same replicum
1661    """
1662    replist = [item for obs in list_of_obs for item in obs.names]
1663    if (len(replist) == len(set(replist))) is False:
1664        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1665    if any([len(o.cov_names) for o in list_of_obs]):
1666        raise Exception('Not possible to merge data that contains covobs!')
1667    new_dict = {}
1668    idl_dict = {}
1669    for o in list_of_obs:
1670        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1671                        for key in set(o.deltas) | set(o.r_values)})
1672        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1673
1674    names = sorted(new_dict.keys())
1675    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1676    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1677    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None):
1680def cov_Obs(means, cov, name, grad=None):
1681    """Create an Obs based on mean(s) and a covariance matrix
1682
1683    Parameters
1684    ----------
1685    mean : list of floats or float
1686        N mean value(s) of the new Obs
1687    cov : list or array
1688        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1689    name : str
1690        identifier for the covariance matrix
1691    grad : list or array
1692        Gradient of the Covobs wrt. the means belonging to cov.
1693    """
1694
1695    def covobs_to_obs(co):
1696        """Make an Obs out of a Covobs
1697
1698        Parameters
1699        ----------
1700        co : Covobs
1701            Covobs to be embedded into the Obs
1702        """
1703        o = Obs([], [], means=[])
1704        o._value = co.value
1705        o.names.append(co.name)
1706        o._covobs[co.name] = co
1707        o._dvalue = np.sqrt(co.errsq())
1708        return o
1709
1710    ol = []
1711    if isinstance(means, (float, int)):
1712        means = [means]
1713
1714    for i in range(len(means)):
1715        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1716    if ol[0].covobs[name].N != len(means):
1717        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1718    if len(ol) == 1:
1719        return ol[0]
1720    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.