pyerrors.obs

   1import warnings
   2import hashlib
   3import pickle
   4import numpy as np
   5import autograd.numpy as anp  # Thinly-wrapped numpy
   6import scipy
   7from autograd import jacobian
   8import matplotlib.pyplot as plt
   9from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  10import numdifftools as nd
  11from itertools import groupby
  12from .covobs import Covobs
  13
  14# Improve print output of numpy.ndarrays containing Obs objects.
  15np.set_printoptions(formatter={'object': lambda x: str(x)})
  16
  17
  18class Obs:
  19    """Class for a general observable.
  20
  21    Instances of Obs are the basic objects of a pyerrors error analysis.
  22    They are initialized with a list which contains arrays of samples for
  23    different ensembles/replica and another list of same length which contains
  24    the names of the ensembles/replica. Mathematical operations can be
  25    performed on instances. The result is another instance of Obs. The error of
  26    an instance can be computed with the gamma_method. Also contains additional
  27    methods for output and visualization of the error calculation.
  28
  29    Attributes
  30    ----------
  31    S_global : float
  32        Standard value for S (default 2.0)
  33    S_dict : dict
  34        Dictionary for S values. If an entry for a given ensemble
  35        exists this overwrites the standard value for that ensemble.
  36    tau_exp_global : float
  37        Standard value for tau_exp (default 0.0)
  38    tau_exp_dict : dict
  39        Dictionary for tau_exp values. If an entry for a given ensemble exists
  40        this overwrites the standard value for that ensemble.
  41    N_sigma_global : float
  42        Standard value for N_sigma (default 1.0)
  43    N_sigma_dict : dict
  44        Dictionary for N_sigma values. If an entry for a given ensemble exists
  45        this overwrites the standard value for that ensemble.
  46    """
  47    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  48                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  49                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  50                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  51                 'idl', 'tag', '_covobs', '__dict__']
  52
  53    S_global = 2.0
  54    S_dict = {}
  55    tau_exp_global = 0.0
  56    tau_exp_dict = {}
  57    N_sigma_global = 1.0
  58    N_sigma_dict = {}
  59
  60    def __init__(self, samples, names, idl=None, **kwargs):
  61        """ Initialize Obs object.
  62
  63        Parameters
  64        ----------
  65        samples : list
  66            list of numpy arrays containing the Monte Carlo samples
  67        names : list
  68            list of strings labeling the individual samples
  69        idl : list, optional
  70            list of ranges or lists on which the samples are defined
  71        """
  72
  73        if kwargs.get("means") is None and len(samples):
  74            if len(samples) != len(names):
  75                raise ValueError('Length of samples and names incompatible.')
  76            if idl is not None:
  77                if len(idl) != len(names):
  78                    raise ValueError('Length of idl incompatible with samples and names.')
  79            name_length = len(names)
  80            if name_length > 1:
  81                if name_length != len(set(names)):
  82                    raise ValueError('Names are not unique.')
  83                if not all(isinstance(x, str) for x in names):
  84                    raise TypeError('All names have to be strings.')
  85                if len(set([o.split('|')[0] for o in names])) > 1:
  86                    raise ValueError('Cannot initialize Obs based on multiple ensembles. Please average separate Obs from each ensemble.')
  87            else:
  88                if not isinstance(names[0], str):
  89                    raise TypeError('All names have to be strings.')
  90            if min(len(x) for x in samples) <= 4:
  91                raise ValueError('Samples have to have at least 5 entries.')
  92
  93        self.names = sorted(names)
  94        self.shape = {}
  95        self.r_values = {}
  96        self.deltas = {}
  97        self._covobs = {}
  98
  99        self._value = 0
 100        self.N = 0
 101        self.idl = {}
 102        if idl is not None:
 103            for name, idx in sorted(zip(names, idl)):
 104                if isinstance(idx, range):
 105                    self.idl[name] = idx
 106                elif isinstance(idx, (list, np.ndarray)):
 107                    dc = np.unique(np.diff(idx))
 108                    if np.any(dc < 0):
 109                        raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]])))
 110                    elif np.any(dc == 0):
 111                        raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]])))
 112                    if len(dc) == 1:
 113                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 114                    else:
 115                        self.idl[name] = list(idx)
 116                else:
 117                    raise TypeError('incompatible type for idl[%s].' % (name))
 118        else:
 119            for name, sample in sorted(zip(names, samples)):
 120                self.idl[name] = range(1, len(sample) + 1)
 121
 122        if kwargs.get("means") is not None:
 123            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 124                self.shape[name] = len(self.idl[name])
 125                self.N += self.shape[name]
 126                self.r_values[name] = mean
 127                self.deltas[name] = sample
 128        else:
 129            for name, sample in sorted(zip(names, samples)):
 130                self.shape[name] = len(self.idl[name])
 131                self.N += self.shape[name]
 132                if len(sample) != self.shape[name]:
 133                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 134                self.r_values[name] = np.mean(sample)
 135                self.deltas[name] = sample - self.r_values[name]
 136                self._value += self.shape[name] * self.r_values[name]
 137            self._value /= self.N
 138
 139        self._dvalue = 0.0
 140        self.ddvalue = 0.0
 141        self.reweighted = False
 142
 143        self.tag = None
 144
 145    @property
 146    def value(self):
 147        return self._value
 148
 149    @property
 150    def dvalue(self):
 151        return self._dvalue
 152
 153    @property
 154    def e_names(self):
 155        return sorted(set([o.split('|')[0] for o in self.names]))
 156
 157    @property
 158    def cov_names(self):
 159        return sorted(set([o for o in self.covobs.keys()]))
 160
 161    @property
 162    def mc_names(self):
 163        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 164
 165    @property
 166    def e_content(self):
 167        res = {}
 168        for e, e_name in enumerate(self.e_names):
 169            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 170            if e_name in self.names:
 171                res[e_name].append(e_name)
 172        return res
 173
 174    @property
 175    def covobs(self):
 176        return self._covobs
 177
 178    def gamma_method(self, **kwargs):
 179        """Estimate the error and related properties of the Obs.
 180
 181        Parameters
 182        ----------
 183        S : float
 184            specifies a custom value for the parameter S (default 2.0).
 185            If set to 0 it is assumed that the data exhibits no
 186            autocorrelation. In this case the error estimates coincides
 187            with the sample standard error.
 188        tau_exp : float
 189            positive value triggers the critical slowing down analysis
 190            (default 0.0).
 191        N_sigma : float
 192            number of standard deviations from zero until the tail is
 193            attached to the autocorrelation function (default 1).
 194        fft : bool
 195            determines whether the fft algorithm is used for the computation
 196            of the autocorrelation function (default True)
 197        """
 198
 199        e_content = self.e_content
 200        self.e_dvalue = {}
 201        self.e_ddvalue = {}
 202        self.e_tauint = {}
 203        self.e_dtauint = {}
 204        self.e_windowsize = {}
 205        self.e_n_tauint = {}
 206        self.e_n_dtauint = {}
 207        e_gamma = {}
 208        self.e_rho = {}
 209        self.e_drho = {}
 210        self._dvalue = 0
 211        self.ddvalue = 0
 212
 213        self.S = {}
 214        self.tau_exp = {}
 215        self.N_sigma = {}
 216
 217        if kwargs.get('fft') is False:
 218            fft = False
 219        else:
 220            fft = True
 221
 222        def _parse_kwarg(kwarg_name):
 223            if kwarg_name in kwargs:
 224                tmp = kwargs.get(kwarg_name)
 225                if isinstance(tmp, (int, float)):
 226                    if tmp < 0:
 227                        raise ValueError(kwarg_name + ' has to be larger or equal to 0.')
 228                    for e, e_name in enumerate(self.e_names):
 229                        getattr(self, kwarg_name)[e_name] = tmp
 230                else:
 231                    raise TypeError(kwarg_name + ' is not in proper format.')
 232            else:
 233                for e, e_name in enumerate(self.e_names):
 234                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 236                    else:
 237                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 238
 239        _parse_kwarg('S')
 240        _parse_kwarg('tau_exp')
 241        _parse_kwarg('N_sigma')
 242
 243        for e, e_name in enumerate(self.mc_names):
 244            gapsize = _determine_gap(self, e_content, e_name)
 245
 246            r_length = []
 247            for r_name in e_content[e_name]:
 248                if isinstance(self.idl[r_name], range):
 249                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
 250                else:
 251                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
 252
 253            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 254            w_max = max(r_length) // 2
 255            e_gamma[e_name] = np.zeros(w_max)
 256            self.e_rho[e_name] = np.zeros(w_max)
 257            self.e_drho[e_name] = np.zeros(w_max)
 258
 259            for r_name in e_content[e_name]:
 260                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 261
 262            gamma_div = np.zeros(w_max)
 263            for r_name in e_content[e_name]:
 264                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 265            gamma_div[gamma_div < 1] = 1.0
 266            e_gamma[e_name] /= gamma_div[:w_max]
 267
 268            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 269                self.e_tauint[e_name] = 0.5
 270                self.e_dtauint[e_name] = 0.0
 271                self.e_dvalue[e_name] = 0.0
 272                self.e_ddvalue[e_name] = 0.0
 273                self.e_windowsize[e_name] = 0
 274                continue
 275
 276            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 277            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 278            # Make sure no entry of tauint is smaller than 0.5
 279            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 280            # hep-lat/0306017 eq. (42)
 281            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 282            self.e_n_dtauint[e_name][0] = 0.0
 283
 284            def _compute_drho(i):
 285                tmp = (self.e_rho[e_name][i + 1:w_max]
 286                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
 287                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
 288                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
 289                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 290
 291            if self.tau_exp[e_name] > 0:
 292                _compute_drho(1)
 293                texp = self.tau_exp[e_name]
 294                # Critical slowing down analysis
 295                if w_max // 2 <= 1:
 296                    raise ValueError("Need at least 8 samples for tau_exp error analysis")
 297                for n in range(1, w_max // 2):
 298                    _compute_drho(n + 1)
 299                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 300                        # Bias correction hep-lat/0306017 eq. (49) included
 301                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 302                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 303                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 304                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 305                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 306                        self.e_windowsize[e_name] = n
 307                        break
 308            else:
 309                if self.S[e_name] == 0.0:
 310                    self.e_tauint[e_name] = 0.5
 311                    self.e_dtauint[e_name] = 0.0
 312                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 313                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 314                    self.e_windowsize[e_name] = 0
 315                else:
 316                    # Standard automatic windowing procedure
 317                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 318                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 319                    for n in range(1, w_max):
 320                        if g_w[n - 1] < 0 or n >= w_max - 1:
 321                            _compute_drho(n)
 322                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 323                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 324                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 325                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 326                            self.e_windowsize[e_name] = n
 327                            break
 328
 329            self._dvalue += self.e_dvalue[e_name] ** 2
 330            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 331
 332        for e_name in self.cov_names:
 333            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 334            self.e_ddvalue[e_name] = 0
 335            self._dvalue += self.e_dvalue[e_name]**2
 336
 337        self._dvalue = np.sqrt(self._dvalue)
 338        if self._dvalue == 0.0:
 339            self.ddvalue = 0.0
 340        else:
 341            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 342        return
 343
 344    gm = gamma_method
 345
 346    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
 347        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 348           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 349
 350        Parameters
 351        ----------
 352        deltas : list
 353            List of fluctuations
 354        idx : list
 355            List or range of configurations on which the deltas are defined.
 356        shape : int
 357            Number of configurations in idx.
 358        w_max : int
 359            Upper bound for the summation window.
 360        fft : bool
 361            determines whether the fft algorithm is used for the computation
 362            of the autocorrelation function.
 363        gapsize : int
 364            The target distance between two configurations. If longer distances
 365            are found in idx, the data is expanded.
 366        """
 367        gamma = np.zeros(w_max)
 368        deltas = _expand_deltas(deltas, idx, shape, gapsize)
 369        new_shape = len(deltas)
 370        if fft:
 371            max_gamma = min(new_shape, w_max)
 372            # The padding for the fft has to be even
 373            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 374            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 375        else:
 376            for n in range(w_max):
 377                if new_shape - n >= 0:
 378                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 379
 380        return gamma
 381
 382    def details(self, ens_content=True):
 383        """Output detailed properties of the Obs.
 384
 385        Parameters
 386        ----------
 387        ens_content : bool
 388            print details about the ensembles and replica if true.
 389        """
 390        if self.tag is not None:
 391            print("Description:", self.tag)
 392        if not hasattr(self, 'e_dvalue'):
 393            print('Result\t %3.8e' % (self.value))
 394        else:
 395            if self.value == 0.0:
 396                percentage = np.nan
 397            else:
 398                percentage = np.abs(self._dvalue / self.value) * 100
 399            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 400            if len(self.e_names) > 1:
 401                print(' Ensemble errors:')
 402            e_content = self.e_content
 403            for e_name in self.mc_names:
 404                gap = _determine_gap(self, e_content, e_name)
 405
 406                if len(self.e_names) > 1:
 407                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 408                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 409                tau_string += f" in units of {gap} config"
 410                if gap > 1:
 411                    tau_string += "s"
 412                if self.tau_exp[e_name] > 0:
 413                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 414                else:
 415                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 416                print(tau_string)
 417            for e_name in self.cov_names:
 418                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 419        if ens_content is True:
 420            if len(self.e_names) == 1:
 421                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 422            else:
 423                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 424            my_string_list = []
 425            for key, value in sorted(self.e_content.items()):
 426                if key not in self.covobs:
 427                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 428                    if len(value) == 1:
 429                        my_string += f': {self.shape[value[0]]} configurations'
 430                        if isinstance(self.idl[value[0]], range):
 431                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 432                        else:
 433                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 434                    else:
 435                        sublist = []
 436                        for v in value:
 437                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 438                            my_substring += f': {self.shape[v]} configurations'
 439                            if isinstance(self.idl[v], range):
 440                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 441                            else:
 442                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 443                            sublist.append(my_substring)
 444
 445                        my_string += '\n' + '\n'.join(sublist)
 446                else:
 447                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 448                my_string_list.append(my_string)
 449            print('\n'.join(my_string_list))
 450
 451    def reweight(self, weight):
 452        """Reweight the obs with given rewighting factors.
 453
 454        Parameters
 455        ----------
 456        weight : Obs
 457            Reweighting factor. An Observable that has to be defined on a superset of the
 458            configurations in obs[i].idl for all i.
 459        all_configs : bool
 460            if True, the reweighted observables are normalized by the average of
 461            the reweighting factor on all configurations in weight.idl and not
 462            on the configurations in obs[i].idl. Default False.
 463        """
 464        return reweight(weight, [self])[0]
 465
 466    def is_zero_within_error(self, sigma=1):
 467        """Checks whether the observable is zero within 'sigma' standard errors.
 468
 469        Parameters
 470        ----------
 471        sigma : int
 472            Number of standard errors used for the check.
 473
 474        Works only properly when the gamma method was run.
 475        """
 476        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 477
 478    def is_zero(self, atol=1e-10):
 479        """Checks whether the observable is zero within a given tolerance.
 480
 481        Parameters
 482        ----------
 483        atol : float
 484            Absolute tolerance (for details see numpy documentation).
 485        """
 486        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 487
 488    def plot_tauint(self, save=None):
 489        """Plot integrated autocorrelation time for each ensemble.
 490
 491        Parameters
 492        ----------
 493        save : str
 494            saves the figure to a file named 'save' if.
 495        """
 496        if not hasattr(self, 'e_dvalue'):
 497            raise Exception('Run the gamma method first.')
 498
 499        for e, e_name in enumerate(self.mc_names):
 500            fig = plt.figure()
 501            plt.xlabel(r'$W$')
 502            plt.ylabel(r'$\tau_\mathrm{int}$')
 503            length = int(len(self.e_n_tauint[e_name]))
 504            if self.tau_exp[e_name] > 0:
 505                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 506                x_help = np.arange(2 * self.tau_exp[e_name])
 507                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 508                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 509                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 510                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 511                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 512                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 513                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 514            else:
 515                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 516                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 517
 518            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 519            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 520            plt.legend()
 521            plt.xlim(-0.5, xmax)
 522            ylim = plt.ylim()
 523            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 524            plt.draw()
 525            if save:
 526                fig.savefig(save + "_" + str(e))
 527
 528    def plot_rho(self, save=None):
 529        """Plot normalized autocorrelation function time for each ensemble.
 530
 531        Parameters
 532        ----------
 533        save : str
 534            saves the figure to a file named 'save' if.
 535        """
 536        if not hasattr(self, 'e_dvalue'):
 537            raise Exception('Run the gamma method first.')
 538        for e, e_name in enumerate(self.mc_names):
 539            fig = plt.figure()
 540            plt.xlabel('W')
 541            plt.ylabel('rho')
 542            length = int(len(self.e_drho[e_name]))
 543            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 544            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 545            if self.tau_exp[e_name] > 0:
 546                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 547                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 548                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 549                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 550            else:
 551                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 552                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 553            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 554            plt.xlim(-0.5, xmax)
 555            plt.draw()
 556            if save:
 557                fig.savefig(save + "_" + str(e))
 558
 559    def plot_rep_dist(self):
 560        """Plot replica distribution for each ensemble with more than one replicum."""
 561        if not hasattr(self, 'e_dvalue'):
 562            raise Exception('Run the gamma method first.')
 563        for e, e_name in enumerate(self.mc_names):
 564            if len(self.e_content[e_name]) == 1:
 565                print('No replica distribution for a single replicum (', e_name, ')')
 566                continue
 567            r_length = []
 568            sub_r_mean = 0
 569            for r, r_name in enumerate(self.e_content[e_name]):
 570                r_length.append(len(self.deltas[r_name]))
 571                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 572            e_N = np.sum(r_length)
 573            sub_r_mean /= e_N
 574            arr = np.zeros(len(self.e_content[e_name]))
 575            for r, r_name in enumerate(self.e_content[e_name]):
 576                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 577            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 578            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 579            plt.draw()
 580
 581    def plot_history(self, expand=True):
 582        """Plot derived Monte Carlo history for each ensemble
 583
 584        Parameters
 585        ----------
 586        expand : bool
 587            show expanded history for irregular Monte Carlo chains (default: True).
 588        """
 589        for e, e_name in enumerate(self.mc_names):
 590            plt.figure()
 591            r_length = []
 592            tmp = []
 593            tmp_expanded = []
 594            for r, r_name in enumerate(self.e_content[e_name]):
 595                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 596                if expand:
 597                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
 598                    r_length.append(len(tmp_expanded[-1]))
 599                else:
 600                    r_length.append(len(tmp[-1]))
 601            e_N = np.sum(r_length)
 602            x = np.arange(e_N)
 603            y_test = np.concatenate(tmp, axis=0)
 604            if expand:
 605                y = np.concatenate(tmp_expanded, axis=0)
 606            else:
 607                y = y_test
 608            plt.errorbar(x, y, fmt='.', markersize=3)
 609            plt.xlim(-0.5, e_N - 0.5)
 610            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 611            plt.draw()
 612
 613    def plot_piechart(self, save=None):
 614        """Plot piechart which shows the fractional contribution of each
 615        ensemble to the error and returns a dictionary containing the fractions.
 616
 617        Parameters
 618        ----------
 619        save : str
 620            saves the figure to a file named 'save' if.
 621        """
 622        if not hasattr(self, 'e_dvalue'):
 623            raise Exception('Run the gamma method first.')
 624        if np.isclose(0.0, self._dvalue, atol=1e-15):
 625            raise ValueError('Error is 0.0')
 626        labels = self.e_names
 627        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 628        fig1, ax1 = plt.subplots()
 629        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 630        ax1.axis('equal')
 631        plt.draw()
 632        if save:
 633            fig1.savefig(save)
 634
 635        return dict(zip(labels, sizes))
 636
 637    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 638        """Dump the Obs to a file 'name' of chosen format.
 639
 640        Parameters
 641        ----------
 642        filename : str
 643            name of the file to be saved.
 644        datatype : str
 645            Format of the exported file. Supported formats include
 646            "json.gz" and "pickle"
 647        description : str
 648            Description for output file, only relevant for json.gz format.
 649        path : str
 650            specifies a custom path for the file (default '.')
 651        """
 652        if 'path' in kwargs:
 653            file_name = kwargs.get('path') + '/' + filename
 654        else:
 655            file_name = filename
 656
 657        if datatype == "json.gz":
 658            from .input.json import dump_to_json
 659            dump_to_json([self], file_name, description=description)
 660        elif datatype == "pickle":
 661            with open(file_name + '.p', 'wb') as fb:
 662                pickle.dump(self, fb)
 663        else:
 664            raise TypeError("Unknown datatype " + str(datatype))
 665
 666    def export_jackknife(self):
 667        """Export jackknife samples from the Obs
 668
 669        Returns
 670        -------
 671        numpy.ndarray
 672            Returns a numpy array of length N + 1 where N is the number of samples
 673            for the given ensemble and replicum. The zeroth entry of the array contains
 674            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 675            derived from the Obs. The current implementation only works for observables
 676            defined on exactly one ensemble and replicum. The derived jackknife samples
 677            should agree with samples from a full jackknife analysis up to O(1/N).
 678        """
 679
 680        if len(self.names) != 1:
 681            raise ValueError("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 682
 683        name = self.names[0]
 684        full_data = self.deltas[name] + self.r_values[name]
 685        n = full_data.size
 686        mean = self.value
 687        tmp_jacks = np.zeros(n + 1)
 688        tmp_jacks[0] = mean
 689        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 690        return tmp_jacks
 691
 692    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
 693        """Export bootstrap samples from the Obs
 694
 695        Parameters
 696        ----------
 697        samples : int
 698            Number of bootstrap samples to generate.
 699        random_numbers : np.ndarray
 700            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
 701            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
 702        save_rng : str
 703            Save the random numbers to a file if a path is specified.
 704
 705        Returns
 706        -------
 707        numpy.ndarray
 708            Returns a numpy array of length N + 1 where N is the number of samples
 709            for the given ensemble and replicum. The zeroth entry of the array contains
 710            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
 711            derived from the Obs. The current implementation only works for observables
 712            defined on exactly one ensemble and replicum. The derived bootstrap samples
 713            should agree with samples from a full bootstrap analysis up to O(1/N).
 714        """
 715        if len(self.names) != 1:
 716            raise ValueError("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
 717
 718        name = self.names[0]
 719        length = self.N
 720
 721        if random_numbers is None:
 722            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
 723            rng = np.random.default_rng(seed)
 724            random_numbers = rng.integers(0, length, size=(samples, length))
 725
 726        if save_rng is not None:
 727            np.savetxt(save_rng, random_numbers, fmt='%i')
 728
 729        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
 730        ret = np.zeros(samples + 1)
 731        ret[0] = self.value
 732        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
 733        return ret
 734
 735    def __float__(self):
 736        return float(self.value)
 737
 738    def __repr__(self):
 739        return 'Obs[' + str(self) + ']'
 740
 741    def __str__(self):
 742        return _format_uncertainty(self.value, self._dvalue)
 743
 744    def __format__(self, format_type):
 745        if format_type == "":
 746            significance = 2
 747        else:
 748            significance = int(float(format_type.replace("+", "").replace("-", "")))
 749        my_str = _format_uncertainty(self.value, self._dvalue,
 750                                     significance=significance)
 751        for char in ["+", " "]:
 752            if format_type.startswith(char):
 753                if my_str[0] != "-":
 754                    my_str = char + my_str
 755        return my_str
 756
 757    def __hash__(self):
 758        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 759        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 760        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 761        hash_tuple += tuple([o.encode() for o in self.names])
 762        m = hashlib.md5()
 763        [m.update(o) for o in hash_tuple]
 764        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 765
 766    # Overload comparisons
 767    def __lt__(self, other):
 768        return self.value < other
 769
 770    def __le__(self, other):
 771        return self.value <= other
 772
 773    def __gt__(self, other):
 774        return self.value > other
 775
 776    def __ge__(self, other):
 777        return self.value >= other
 778
 779    def __eq__(self, other):
 780        if other is None:
 781            return False
 782        return (self - other).is_zero()
 783
 784    # Overload math operations
 785    def __add__(self, y):
 786        if isinstance(y, Obs):
 787            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 788        else:
 789            if isinstance(y, np.ndarray):
 790                return np.array([self + o for o in y])
 791            elif isinstance(y, complex):
 792                return CObs(self, 0) + y
 793            elif y.__class__.__name__ in ['Corr', 'CObs']:
 794                return NotImplemented
 795            else:
 796                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 797
 798    def __radd__(self, y):
 799        return self + y
 800
 801    def __mul__(self, y):
 802        if isinstance(y, Obs):
 803            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 804        else:
 805            if isinstance(y, np.ndarray):
 806                return np.array([self * o for o in y])
 807            elif isinstance(y, complex):
 808                return CObs(self * y.real, self * y.imag)
 809            elif y.__class__.__name__ in ['Corr', 'CObs']:
 810                return NotImplemented
 811            else:
 812                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 813
 814    def __rmul__(self, y):
 815        return self * y
 816
 817    def __sub__(self, y):
 818        if isinstance(y, Obs):
 819            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 820        else:
 821            if isinstance(y, np.ndarray):
 822                return np.array([self - o for o in y])
 823            elif y.__class__.__name__ in ['Corr', 'CObs']:
 824                return NotImplemented
 825            else:
 826                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 827
 828    def __rsub__(self, y):
 829        return -1 * (self - y)
 830
 831    def __pos__(self):
 832        return self
 833
 834    def __neg__(self):
 835        return -1 * self
 836
 837    def __truediv__(self, y):
 838        if isinstance(y, Obs):
 839            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 840        else:
 841            if isinstance(y, np.ndarray):
 842                return np.array([self / o for o in y])
 843            elif y.__class__.__name__ in ['Corr', 'CObs']:
 844                return NotImplemented
 845            else:
 846                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 847
 848    def __rtruediv__(self, y):
 849        if isinstance(y, Obs):
 850            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 851        else:
 852            if isinstance(y, np.ndarray):
 853                return np.array([o / self for o in y])
 854            elif y.__class__.__name__ in ['Corr', 'CObs']:
 855                return NotImplemented
 856            else:
 857                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 858
 859    def __pow__(self, y):
 860        if isinstance(y, Obs):
 861            return derived_observable(lambda x, **kwargs: x[0] ** x[1], [self, y], man_grad=[y.value * self.value ** (y.value - 1), self.value ** y.value * np.log(self.value)])
 862        else:
 863            return derived_observable(lambda x, **kwargs: x[0] ** y, [self], man_grad=[y * self.value ** (y - 1)])
 864
 865    def __rpow__(self, y):
 866        return derived_observable(lambda x, **kwargs: y ** x[0], [self], man_grad=[y ** self.value * np.log(y)])
 867
 868    def __abs__(self):
 869        return derived_observable(lambda x: anp.abs(x[0]), [self])
 870
 871    # Overload numpy functions
 872    def sqrt(self):
 873        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 874
 875    def log(self):
 876        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 877
 878    def exp(self):
 879        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 880
 881    def sin(self):
 882        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 883
 884    def cos(self):
 885        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 886
 887    def tan(self):
 888        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 889
 890    def arcsin(self):
 891        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 892
 893    def arccos(self):
 894        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 895
 896    def arctan(self):
 897        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 898
 899    def sinh(self):
 900        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 901
 902    def cosh(self):
 903        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 904
 905    def tanh(self):
 906        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 907
 908    def arcsinh(self):
 909        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 910
 911    def arccosh(self):
 912        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 913
 914    def arctanh(self):
 915        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 916
 917
 918class CObs:
 919    """Class for a complex valued observable."""
 920    __slots__ = ['_real', '_imag', 'tag']
 921
 922    def __init__(self, real, imag=0.0):
 923        self._real = real
 924        self._imag = imag
 925        self.tag = None
 926
 927    @property
 928    def real(self):
 929        return self._real
 930
 931    @property
 932    def imag(self):
 933        return self._imag
 934
 935    def gamma_method(self, **kwargs):
 936        """Executes the gamma_method for the real and the imaginary part."""
 937        if isinstance(self.real, Obs):
 938            self.real.gamma_method(**kwargs)
 939        if isinstance(self.imag, Obs):
 940            self.imag.gamma_method(**kwargs)
 941
 942    def is_zero(self):
 943        """Checks whether both real and imaginary part are zero within machine precision."""
 944        return self.real == 0.0 and self.imag == 0.0
 945
 946    def conjugate(self):
 947        return CObs(self.real, -self.imag)
 948
 949    def __add__(self, other):
 950        if isinstance(other, np.ndarray):
 951            return other + self
 952        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 953            return CObs(self.real + other.real,
 954                        self.imag + other.imag)
 955        else:
 956            return CObs(self.real + other, self.imag)
 957
 958    def __radd__(self, y):
 959        return self + y
 960
 961    def __sub__(self, other):
 962        if isinstance(other, np.ndarray):
 963            return -1 * (other - self)
 964        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 965            return CObs(self.real - other.real, self.imag - other.imag)
 966        else:
 967            return CObs(self.real - other, self.imag)
 968
 969    def __rsub__(self, other):
 970        return -1 * (self - other)
 971
 972    def __mul__(self, other):
 973        if isinstance(other, np.ndarray):
 974            return other * self
 975        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 976            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 977                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 978                                               [self.real, other.real, self.imag, other.imag],
 979                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 980                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 981                                               [self.real, other.real, self.imag, other.imag],
 982                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 983            elif getattr(other, 'imag', 0) != 0:
 984                return CObs(self.real * other.real - self.imag * other.imag,
 985                            self.imag * other.real + self.real * other.imag)
 986            else:
 987                return CObs(self.real * other.real, self.imag * other.real)
 988        else:
 989            return CObs(self.real * other, self.imag * other)
 990
 991    def __rmul__(self, other):
 992        return self * other
 993
 994    def __truediv__(self, other):
 995        if isinstance(other, np.ndarray):
 996            return 1 / (other / self)
 997        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 998            r = other.real ** 2 + other.imag ** 2
 999            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
1000        else:
1001            return CObs(self.real / other, self.imag / other)
1002
1003    def __rtruediv__(self, other):
1004        r = self.real ** 2 + self.imag ** 2
1005        if hasattr(other, 'real') and hasattr(other, 'imag'):
1006            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1007        else:
1008            return CObs(self.real * other / r, -self.imag * other / r)
1009
1010    def __abs__(self):
1011        return np.sqrt(self.real**2 + self.imag**2)
1012
1013    def __pos__(self):
1014        return self
1015
1016    def __neg__(self):
1017        return -1 * self
1018
1019    def __eq__(self, other):
1020        return self.real == other.real and self.imag == other.imag
1021
1022    def __str__(self):
1023        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1024
1025    def __repr__(self):
1026        return 'CObs[' + str(self) + ']'
1027
1028    def __format__(self, format_type):
1029        if format_type == "":
1030            significance = 2
1031            format_type = "2"
1032        else:
1033            significance = int(float(format_type.replace("+", "").replace("-", "")))
1034        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"
1035
1036
1037def gamma_method(x, **kwargs):
1038    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1039
1040    See docstring of pe.Obs.gamma_method for details.
1041    """
1042    return np.vectorize(lambda o: o.gm(**kwargs))(x)
1043
1044
1045gm = gamma_method
1046
1047
1048def _format_uncertainty(value, dvalue, significance=2):
1049    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
1050    if dvalue == 0.0 or (not np.isfinite(dvalue)):
1051        return str(value)
1052    if not isinstance(significance, int):
1053        raise TypeError("significance needs to be an integer.")
1054    if significance < 1:
1055        raise ValueError("significance needs to be larger than zero.")
1056    fexp = np.floor(np.log10(dvalue))
1057    if fexp < 0.0:
1058        return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f')
1059    elif fexp == 0.0:
1060        return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})"
1061    else:
1062        return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})"
1063
1064
1065def _expand_deltas(deltas, idx, shape, gapsize):
1066    """Expand deltas defined on idx to a regular range with spacing gapsize between two
1067       configurations and where holes are filled by 0.
1068       If idx is of type range, the deltas are not changed if the idx.step == gapsize.
1069
1070    Parameters
1071    ----------
1072    deltas : list
1073        List of fluctuations
1074    idx : list
1075        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1076    shape : int
1077        Number of configs in idx.
1078    gapsize : int
1079        The target distance between two configurations. If longer distances
1080        are found in idx, the data is expanded.
1081    """
1082    if isinstance(idx, range):
1083        if (idx.step == gapsize):
1084            return deltas
1085    ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize)
1086    for i in range(shape):
1087        ret[(idx[i] - idx[0]) // gapsize] = deltas[i]
1088    return ret
1089
1090
1091def _merge_idx(idl):
1092    """Returns the union of all lists in idl as range or sorted list
1093
1094    Parameters
1095    ----------
1096    idl : list
1097        List of lists or ranges.
1098    """
1099
1100    if _check_lists_equal(idl):
1101        return idl[0]
1102
1103    idunion = sorted(set().union(*idl))
1104
1105    # Check whether idunion can be expressed as range
1106    idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0])
1107    idtest = [list(idrange), idunion]
1108    if _check_lists_equal(idtest):
1109        return idrange
1110
1111    return idunion
1112
1113
1114def _intersection_idx(idl):
1115    """Returns the intersection of all lists in idl as range or sorted list
1116
1117    Parameters
1118    ----------
1119    idl : list
1120        List of lists or ranges.
1121    """
1122
1123    if _check_lists_equal(idl):
1124        return idl[0]
1125
1126    idinter = sorted(set.intersection(*[set(o) for o in idl]))
1127
1128    # Check whether idinter can be expressed as range
1129    try:
1130        idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0])
1131        idtest = [list(idrange), idinter]
1132        if _check_lists_equal(idtest):
1133            return idrange
1134    except IndexError:
1135        pass
1136
1137    return idinter
1138
1139
1140def _expand_deltas_for_merge(deltas, idx, shape, new_idx, scalefactor):
1141    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1142       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1143       common divisor of the step sizes is used as new step size.
1144
1145    Parameters
1146    ----------
1147    deltas : list
1148        List of fluctuations
1149    idx : list
1150        List or range of configs on which the deltas are defined.
1151        Has to be a subset of new_idx and has to be sorted in ascending order.
1152    shape : list
1153        Number of configs in idx.
1154    new_idx : list
1155        List of configs that defines the new range, has to be sorted in ascending order.
1156    scalefactor : float
1157        An additional scaling factor that can be applied to scale the fluctuations,
1158        e.g., when Obs with differing numbers of replica are merged.
1159    """
1160    if type(idx) is range and type(new_idx) is range:
1161        if idx == new_idx:
1162            if scalefactor == 1:
1163                return deltas
1164            else:
1165                return deltas * scalefactor
1166    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1167    for i in range(shape):
1168        ret[idx[i] - new_idx[0]] = deltas[i]
1169    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx) * scalefactor
1170
1171
1172def derived_observable(func, data, array_mode=False, **kwargs):
1173    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1174
1175    Parameters
1176    ----------
1177    func : object
1178        arbitrary function of the form func(data, **kwargs). For the
1179        automatic differentiation to work, all numpy functions have to have
1180        the autograd wrapper (use 'import autograd.numpy as anp').
1181    data : list
1182        list of Obs, e.g. [obs1, obs2, obs3].
1183    num_grad : bool
1184        if True, numerical derivatives are used instead of autograd
1185        (default False). To control the numerical differentiation the
1186        kwargs of numdifftools.step_generators.MaxStepGenerator
1187        can be used.
1188    man_grad : list
1189        manually supply a list or an array which contains the jacobian
1190        of func. Use cautiously, supplying the wrong derivative will
1191        not be intercepted.
1192
1193    Notes
1194    -----
1195    For simple mathematical operations it can be practical to use anonymous
1196    functions. For the ratio of two observables one can e.g. use
1197
1198    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1199    """
1200
1201    data = np.asarray(data)
1202    raveled_data = data.ravel()
1203
1204    # Workaround for matrix operations containing non Obs data
1205    if not all(isinstance(x, Obs) for x in raveled_data):
1206        for i in range(len(raveled_data)):
1207            if isinstance(raveled_data[i], (int, float)):
1208                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1209
1210    allcov = {}
1211    for o in raveled_data:
1212        for name in o.cov_names:
1213            if name in allcov:
1214                if not np.allclose(allcov[name], o.covobs[name].cov):
1215                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1216            else:
1217                allcov[name] = o.covobs[name].cov
1218
1219    n_obs = len(raveled_data)
1220    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1221    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1222    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1223
1224    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1225
1226    if data.ndim == 1:
1227        values = np.array([o.value for o in data])
1228    else:
1229        values = np.vectorize(lambda x: x.value)(data)
1230
1231    new_values = func(values, **kwargs)
1232
1233    multi = int(isinstance(new_values, np.ndarray))
1234
1235    new_r_values = {}
1236    new_idl_d = {}
1237    for name in new_sample_names:
1238        idl = []
1239        tmp_values = np.zeros(n_obs)
1240        for i, item in enumerate(raveled_data):
1241            tmp_values[i] = item.r_values.get(name, item.value)
1242            tmp_idl = item.idl.get(name)
1243            if tmp_idl is not None:
1244                idl.append(tmp_idl)
1245        if multi > 0:
1246            tmp_values = np.array(tmp_values).reshape(data.shape)
1247        new_r_values[name] = func(tmp_values, **kwargs)
1248        new_idl_d[name] = _merge_idx(idl)
1249
1250    def _compute_scalefactor_missing_rep(obs):
1251        """
1252        Computes the scale factor that is to be multiplied with the deltas
1253        in the case where Obs with different subsets of replica are merged.
1254        Returns a dictionary with the scale factor for each Monte Carlo name.
1255
1256        Parameters
1257        ----------
1258        obs : Obs
1259            The observable corresponding to the deltas that are to be scaled
1260        """
1261        scalef_d = {}
1262        for mc_name in obs.mc_names:
1263            mc_idl_d = [name for name in obs.idl if name.startswith(mc_name + '|')]
1264            new_mc_idl_d = [name for name in new_idl_d if name.startswith(mc_name + '|')]
1265            if len(mc_idl_d) > 0 and len(mc_idl_d) < len(new_mc_idl_d):
1266                scalef_d[mc_name] = sum([len(new_idl_d[name]) for name in new_mc_idl_d]) / sum([len(new_idl_d[name]) for name in mc_idl_d])
1267        return scalef_d
1268
1269    if 'man_grad' in kwargs:
1270        deriv = np.asarray(kwargs.get('man_grad'))
1271        if new_values.shape + data.shape != deriv.shape:
1272            raise ValueError('Manual derivative does not have correct shape.')
1273    elif kwargs.get('num_grad') is True:
1274        if multi > 0:
1275            raise Exception('Multi mode currently not supported for numerical derivative')
1276        options = {
1277            'base_step': 0.1,
1278            'step_ratio': 2.5}
1279        for key in options.keys():
1280            kwarg = kwargs.get(key)
1281            if kwarg is not None:
1282                options[key] = kwarg
1283        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1284        if tmp_df.size == 1:
1285            deriv = np.array([tmp_df.real])
1286        else:
1287            deriv = tmp_df.real
1288    else:
1289        deriv = jacobian(func)(values, **kwargs)
1290
1291    final_result = np.zeros(new_values.shape, dtype=object)
1292
1293    if array_mode is True:
1294
1295        class _Zero_grad():
1296            def __init__(self, N):
1297                self.grad = np.zeros((N, 1))
1298
1299        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1300        d_extracted = {}
1301        g_extracted = {}
1302        for name in new_sample_names:
1303            d_extracted[name] = []
1304            ens_length = len(new_idl_d[name])
1305            for i_dat, dat in enumerate(data):
1306                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name], _compute_scalefactor_missing_rep(o).get(name.split('|')[0], 1)) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1307        for name in new_cov_names:
1308            g_extracted[name] = []
1309            zero_grad = _Zero_grad(new_covobs_lengths[name])
1310            for i_dat, dat in enumerate(data):
1311                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1312
1313    for i_val, new_val in np.ndenumerate(new_values):
1314        new_deltas = {}
1315        new_grad = {}
1316        if array_mode is True:
1317            for name in new_sample_names:
1318                ens_length = d_extracted[name][0].shape[-1]
1319                new_deltas[name] = np.zeros(ens_length)
1320                for i_dat, dat in enumerate(d_extracted[name]):
1321                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1322            for name in new_cov_names:
1323                new_grad[name] = 0
1324                for i_dat, dat in enumerate(g_extracted[name]):
1325                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1326        else:
1327            for j_obs, obs in np.ndenumerate(data):
1328                scalef_d = _compute_scalefactor_missing_rep(obs)
1329                for name in obs.names:
1330                    if name in obs.cov_names:
1331                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1332                    else:
1333                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name], scalef_d.get(name.split('|')[0], 1))
1334
1335        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1336
1337        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1338            raise ValueError('The same name has been used for deltas and covobs!')
1339        new_samples = []
1340        new_means = []
1341        new_idl = []
1342        new_names_obs = []
1343        for name in new_names:
1344            if name not in new_covobs:
1345                new_samples.append(new_deltas[name])
1346                new_idl.append(new_idl_d[name])
1347                new_means.append(new_r_values[name][i_val])
1348                new_names_obs.append(name)
1349        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1350        for name in new_covobs:
1351            final_result[i_val].names.append(name)
1352        final_result[i_val]._covobs = new_covobs
1353        final_result[i_val]._value = new_val
1354        final_result[i_val].reweighted = reweighted
1355
1356    if multi == 0:
1357        final_result = final_result.item()
1358
1359    return final_result
1360
1361
1362def _reduce_deltas(deltas, idx_old, idx_new):
1363    """Extract deltas defined on idx_old on all configs of idx_new.
1364
1365    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1366    are ordered in an ascending order.
1367
1368    Parameters
1369    ----------
1370    deltas : list
1371        List of fluctuations
1372    idx_old : list
1373        List or range of configs on which the deltas are defined
1374    idx_new : list
1375        List of configs for which we want to extract the deltas.
1376        Has to be a subset of idx_old.
1377    """
1378    if not len(deltas) == len(idx_old):
1379        raise ValueError('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1380    if type(idx_old) is range and type(idx_new) is range:
1381        if idx_old == idx_new:
1382            return deltas
1383    if _check_lists_equal([idx_old, idx_new]):
1384        return deltas
1385    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1386    if len(indices) < len(idx_new):
1387        raise ValueError('Error in _reduce_deltas: Config of idx_new not in idx_old')
1388    return np.array(deltas)[indices]
1389
1390
1391def reweight(weight, obs, **kwargs):
1392    """Reweight a list of observables.
1393
1394    Parameters
1395    ----------
1396    weight : Obs
1397        Reweighting factor. An Observable that has to be defined on a superset of the
1398        configurations in obs[i].idl for all i.
1399    obs : list
1400        list of Obs, e.g. [obs1, obs2, obs3].
1401    all_configs : bool
1402        if True, the reweighted observables are normalized by the average of
1403        the reweighting factor on all configurations in weight.idl and not
1404        on the configurations in obs[i].idl. Default False.
1405    """
1406    result = []
1407    for i in range(len(obs)):
1408        if len(obs[i].cov_names):
1409            raise ValueError('Error: Not possible to reweight an Obs that contains covobs!')
1410        if not set(obs[i].names).issubset(weight.names):
1411            raise ValueError('Error: Ensembles do not fit')
1412        if len(obs[i].mc_names) > 1 or len(weight.mc_names) > 1:
1413            raise ValueError('Error: Cannot reweight an Obs that contains multiple ensembles.')
1414        for name in obs[i].names:
1415            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1416                raise ValueError('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1417        new_samples = []
1418        w_deltas = {}
1419        for name in sorted(obs[i].names):
1420            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1421            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1422        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1423
1424        if kwargs.get('all_configs'):
1425            new_weight = weight
1426        else:
1427            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1428
1429        result.append(tmp_obs / new_weight)
1430        result[-1].reweighted = True
1431
1432    return result
1433
1434
1435def correlate(obs_a, obs_b):
1436    """Correlate two observables.
1437
1438    Parameters
1439    ----------
1440    obs_a : Obs
1441        First observable
1442    obs_b : Obs
1443        Second observable
1444
1445    Notes
1446    -----
1447    Keep in mind to only correlate primary observables which have not been reweighted
1448    yet. The reweighting has to be applied after correlating the observables.
1449    Only works if a single ensemble is present in the Obs.
1450    Currently only works if ensemble content is identical (this is not strictly necessary).
1451    """
1452
1453    if len(obs_a.mc_names) > 1 or len(obs_b.mc_names) > 1:
1454        raise ValueError('Error: Cannot correlate Obs that contain multiple ensembles.')
1455    if sorted(obs_a.names) != sorted(obs_b.names):
1456        raise ValueError(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1457    if len(obs_a.cov_names) or len(obs_b.cov_names):
1458        raise ValueError('Error: Not possible to correlate Obs that contain covobs!')
1459    for name in obs_a.names:
1460        if obs_a.shape[name] != obs_b.shape[name]:
1461            raise ValueError('Shapes of ensemble', name, 'do not fit')
1462        if obs_a.idl[name] != obs_b.idl[name]:
1463            raise ValueError('idl of ensemble', name, 'do not fit')
1464
1465    if obs_a.reweighted is True:
1466        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1467    if obs_b.reweighted is True:
1468        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1469
1470    new_samples = []
1471    new_idl = []
1472    for name in sorted(obs_a.names):
1473        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1474        new_idl.append(obs_a.idl[name])
1475
1476    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1477    o.reweighted = obs_a.reweighted or obs_b.reweighted
1478    return o
1479
1480
1481def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1482    r'''Calculates the error covariance matrix of a set of observables.
1483
1484    WARNING: This function should be used with care, especially for observables with support on multiple
1485             ensembles with differing autocorrelations. See the notes below for details.
1486
1487    The gamma method has to be applied first to all observables.
1488
1489    Parameters
1490    ----------
1491    obs : list or numpy.ndarray
1492        List or one dimensional array of Obs
1493    visualize : bool
1494        If True plots the corresponding normalized correlation matrix (default False).
1495    correlation : bool
1496        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1497    smooth : None or int
1498        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1499        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1500        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1501        small ones.
1502
1503    Notes
1504    -----
1505    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1506    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1507    in the absence of autocorrelation.
1508    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1509    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1510    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1511    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1512    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1513    '''
1514
1515    length = len(obs)
1516
1517    max_samples = np.max([o.N for o in obs])
1518    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1519        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1520
1521    cov = np.zeros((length, length))
1522    for i in range(length):
1523        for j in range(i, length):
1524            cov[i, j] = _covariance_element(obs[i], obs[j])
1525    cov = cov + cov.T - np.diag(np.diag(cov))
1526
1527    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1528
1529    if isinstance(smooth, int):
1530        corr = _smooth_eigenvalues(corr, smooth)
1531
1532    if visualize:
1533        plt.matshow(corr, vmin=-1, vmax=1)
1534        plt.set_cmap('RdBu')
1535        plt.colorbar()
1536        plt.draw()
1537
1538    if correlation is True:
1539        return corr
1540
1541    errors = [o.dvalue for o in obs]
1542    cov = np.diag(errors) @ corr @ np.diag(errors)
1543
1544    eigenvalues = np.linalg.eigh(cov)[0]
1545    if not np.all(eigenvalues >= 0):
1546        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1547
1548    return cov
1549
1550
1551def invert_corr_cov_cholesky(corr, inverrdiag):
1552    """Constructs a lower triangular matrix `chol` via the Cholesky decomposition of the correlation matrix `corr`
1553       and then returns the inverse covariance matrix `chol_inv` as a lower triangular matrix by solving `chol * x = inverrdiag`.
1554
1555    Parameters
1556    ----------
1557    corr : np.ndarray
1558           correlation matrix
1559    inverrdiag : np.ndarray
1560              diagonal matrix, the entries are the inverse errors of the data points considered
1561    """
1562
1563    condn = np.linalg.cond(corr)
1564    if condn > 0.1 / np.finfo(float).eps:
1565        raise ValueError(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
1566    if condn > 1e13:
1567        warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
1568    chol = np.linalg.cholesky(corr)
1569    chol_inv = scipy.linalg.solve_triangular(chol, inverrdiag, lower=True)
1570
1571    return chol_inv
1572
1573
1574def sort_corr(corr, kl, yd):
1575    """ Reorders a correlation matrix to match the alphabetical order of its underlying y data.
1576
1577    The ordering of the input correlation matrix `corr` is given by the list of keys `kl`.
1578    The input dictionary `yd` (with the same keys `kl`) must contain the corresponding y data
1579    that the correlation matrix is based on.
1580    This function sorts the list of keys `kl` alphabetically and sorts the matrix `corr`
1581    according to this alphabetical order such that the sorted matrix `corr_sorted` corresponds
1582    to the y data `yd` when arranged in an alphabetical order by its keys.
1583
1584    Parameters
1585    ----------
1586    corr : np.ndarray
1587        A square correlation matrix constructed using the order of the y data specified by `kl`.
1588        The dimensions of `corr` should match the total number of y data points in `yd` combined.
1589    kl : list of str
1590        A list of keys that denotes the order in which the y data from `yd` was used to build the
1591        input correlation matrix `corr`.
1592    yd : dict of list
1593        A dictionary where each key corresponds to a unique identifier, and its value is a list of
1594        y data points. The total number of y data points across all keys must match the dimensions
1595        of `corr`. The lists in the dictionary can be lists of Obs.
1596
1597    Returns
1598    -------
1599    np.ndarray
1600        A new, sorted correlation matrix that corresponds to the y data from `yd` when arranged alphabetically by its keys.
1601
1602    Example
1603    -------
1604    >>> import numpy as np
1605    >>> import pyerrors as pe
1606    >>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
1607    >>> kl = ['b', 'a']
1608    >>> yd = {'a': [1, 2], 'b': [3]}
1609    >>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
1610    >>> print(sorted_corr)
1611    array([[1. , 0.3, 0.4],
1612           [0.3, 1. , 0.2],
1613           [0.4, 0.2, 1. ]])
1614
1615    """
1616    kl_sorted = sorted(kl)
1617
1618    posd = {}
1619    ofs = 0
1620    for ki, k in enumerate(kl):
1621        posd[k] = [i + ofs for i in range(len(yd[k]))]
1622        ofs += len(posd[k])
1623
1624    mapping = []
1625    for k in kl_sorted:
1626        for i in range(len(yd[k])):
1627            mapping.append(posd[k][i])
1628
1629    corr_sorted = np.zeros_like(corr)
1630    for i in range(corr.shape[0]):
1631        for j in range(corr.shape[0]):
1632            corr_sorted[i][j] = corr[mapping[i]][mapping[j]]
1633
1634    return corr_sorted
1635
1636
1637def _smooth_eigenvalues(corr, E):
1638    """Eigenvalue smoothing as described in hep-lat/9412087
1639
1640    corr : np.ndarray
1641        correlation matrix
1642    E : integer
1643        Number of eigenvalues to be left substantially unchanged
1644    """
1645    if not (2 < E < corr.shape[0] - 1):
1646        raise ValueError(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1647    vals, vec = np.linalg.eigh(corr)
1648    lambda_min = np.mean(vals[:-E])
1649    vals[vals < lambda_min] = lambda_min
1650    vals /= np.mean(vals)
1651    return vec @ np.diag(vals) @ vec.T
1652
1653
1654def _covariance_element(obs1, obs2):
1655    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1656
1657    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1658        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1659        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1660        return np.sum(deltas1 * deltas2)
1661
1662    if set(obs1.names).isdisjoint(set(obs2.names)):
1663        return 0.0
1664
1665    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1666        raise Exception('The gamma method has to be applied to both Obs first.')
1667
1668    dvalue = 0.0
1669
1670    for e_name in obs1.mc_names:
1671
1672        if e_name not in obs2.mc_names:
1673            continue
1674
1675        idl_d = {}
1676        for r_name in obs1.e_content[e_name]:
1677            if r_name not in obs2.e_content[e_name]:
1678                continue
1679            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1680
1681        gamma = 0.0
1682
1683        for r_name in obs1.e_content[e_name]:
1684            if r_name not in obs2.e_content[e_name]:
1685                continue
1686            if len(idl_d[r_name]) == 0:
1687                continue
1688            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1689
1690        if gamma == 0.0:
1691            continue
1692
1693        gamma_div = 0.0
1694        for r_name in obs1.e_content[e_name]:
1695            if r_name not in obs2.e_content[e_name]:
1696                continue
1697            if len(idl_d[r_name]) == 0:
1698                continue
1699            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1700        gamma /= gamma_div
1701
1702        dvalue += gamma
1703
1704    for e_name in obs1.cov_names:
1705
1706        if e_name not in obs2.cov_names:
1707            continue
1708
1709        dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item()
1710
1711    return dvalue
1712
1713
1714def import_jackknife(jacks, name, idl=None):
1715    """Imports jackknife samples and returns an Obs
1716
1717    Parameters
1718    ----------
1719    jacks : numpy.ndarray
1720        numpy array containing the mean value as zeroth entry and
1721        the N jackknife samples as first to Nth entry.
1722    name : str
1723        name of the ensemble the samples are defined on.
1724    """
1725    length = len(jacks) - 1
1726    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1727    samples = jacks[1:] @ prj
1728    mean = np.mean(samples)
1729    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1730    new_obs._value = jacks[0]
1731    return new_obs
1732
1733
1734def import_bootstrap(boots, name, random_numbers):
1735    """Imports bootstrap samples and returns an Obs
1736
1737    Parameters
1738    ----------
1739    boots : numpy.ndarray
1740        numpy array containing the mean value as zeroth entry and
1741        the N bootstrap samples as first to Nth entry.
1742    name : str
1743        name of the ensemble the samples are defined on.
1744    random_numbers : np.ndarray
1745        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1746        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1747        chain to be reconstructed.
1748    """
1749    samples, length = random_numbers.shape
1750    if samples != len(boots) - 1:
1751        raise ValueError("Random numbers do not have the correct shape.")
1752
1753    if samples < length:
1754        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1755
1756    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1757
1758    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1759    ret = Obs([samples], [name])
1760    ret._value = boots[0]
1761    return ret
1762
1763
1764def merge_obs(list_of_obs):
1765    """Combine all observables in list_of_obs into one new observable.
1766    This allows to merge Obs that have been computed on multiple replica
1767    of the same ensemble.
1768    If you like to merge Obs that are based on several ensembles, please
1769    average them yourself.
1770
1771    Parameters
1772    ----------
1773    list_of_obs : list
1774        list of the Obs object to be combined
1775
1776    Notes
1777    -----
1778    It is not possible to combine obs which are based on the same replicum
1779    """
1780    replist = [item for obs in list_of_obs for item in obs.names]
1781    if (len(replist) == len(set(replist))) is False:
1782        raise ValueError('list_of_obs contains duplicate replica: %s' % (str(replist)))
1783    if any([len(o.cov_names) for o in list_of_obs]):
1784        raise ValueError('Not possible to merge data that contains covobs!')
1785    new_dict = {}
1786    idl_dict = {}
1787    for o in list_of_obs:
1788        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1789                        for key in set(o.deltas) | set(o.r_values)})
1790        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1791
1792    names = sorted(new_dict.keys())
1793    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1794    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1795    return o
1796
1797
1798def cov_Obs(means, cov, name, grad=None):
1799    """Create an Obs based on mean(s) and a covariance matrix
1800
1801    Parameters
1802    ----------
1803    mean : list of floats or float
1804        N mean value(s) of the new Obs
1805    cov : list or array
1806        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1807    name : str
1808        identifier for the covariance matrix
1809    grad : list or array
1810        Gradient of the Covobs wrt. the means belonging to cov.
1811    """
1812
1813    def covobs_to_obs(co):
1814        """Make an Obs out of a Covobs
1815
1816        Parameters
1817        ----------
1818        co : Covobs
1819            Covobs to be embedded into the Obs
1820        """
1821        o = Obs([], [], means=[])
1822        o._value = co.value
1823        o.names.append(co.name)
1824        o._covobs[co.name] = co
1825        o._dvalue = np.sqrt(co.errsq())
1826        return o
1827
1828    ol = []
1829    if isinstance(means, (float, int)):
1830        means = [means]
1831
1832    for i in range(len(means)):
1833        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1834    if ol[0].covobs[name].N != len(means):
1835        raise ValueError('You have to provide %d mean values!' % (ol[0].N))
1836    if len(ol) == 1:
1837        return ol[0]
1838    return ol
1839
1840
1841def _determine_gap(o, e_content, e_name):
1842    gaps = []
1843    for r_name in e_content[e_name]:
1844        if isinstance(o.idl[r_name], range):
1845            gaps.append(o.idl[r_name].step)
1846        else:
1847            gaps.append(np.min(np.diff(o.idl[r_name])))
1848
1849    gap = min(gaps)
1850    if not np.all([gi % gap == 0 for gi in gaps]):
1851        raise ValueError(f"Replica for ensemble {e_name} do not have a common spacing.", gaps)
1852
1853    return gap
1854
1855
1856def _check_lists_equal(idl):
1857    '''
1858    Use groupby to efficiently check whether all elements of idl are identical.
1859    Returns True if all elements are equal, otherwise False.
1860
1861    Parameters
1862    ----------
1863    idl : list of lists, ranges or np.ndarrays
1864    '''
1865    g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl])
1866    if next(g, True) and not next(g, False):
1867        return True
1868    return False
class Obs:
 19class Obs:
 20    """Class for a general observable.
 21
 22    Instances of Obs are the basic objects of a pyerrors error analysis.
 23    They are initialized with a list which contains arrays of samples for
 24    different ensembles/replica and another list of same length which contains
 25    the names of the ensembles/replica. Mathematical operations can be
 26    performed on instances. The result is another instance of Obs. The error of
 27    an instance can be computed with the gamma_method. Also contains additional
 28    methods for output and visualization of the error calculation.
 29
 30    Attributes
 31    ----------
 32    S_global : float
 33        Standard value for S (default 2.0)
 34    S_dict : dict
 35        Dictionary for S values. If an entry for a given ensemble
 36        exists this overwrites the standard value for that ensemble.
 37    tau_exp_global : float
 38        Standard value for tau_exp (default 0.0)
 39    tau_exp_dict : dict
 40        Dictionary for tau_exp values. If an entry for a given ensemble exists
 41        this overwrites the standard value for that ensemble.
 42    N_sigma_global : float
 43        Standard value for N_sigma (default 1.0)
 44    N_sigma_dict : dict
 45        Dictionary for N_sigma values. If an entry for a given ensemble exists
 46        this overwrites the standard value for that ensemble.
 47    """
 48    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 49                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 50                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 51                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 52                 'idl', 'tag', '_covobs', '__dict__']
 53
 54    S_global = 2.0
 55    S_dict = {}
 56    tau_exp_global = 0.0
 57    tau_exp_dict = {}
 58    N_sigma_global = 1.0
 59    N_sigma_dict = {}
 60
 61    def __init__(self, samples, names, idl=None, **kwargs):
 62        """ Initialize Obs object.
 63
 64        Parameters
 65        ----------
 66        samples : list
 67            list of numpy arrays containing the Monte Carlo samples
 68        names : list
 69            list of strings labeling the individual samples
 70        idl : list, optional
 71            list of ranges or lists on which the samples are defined
 72        """
 73
 74        if kwargs.get("means") is None and len(samples):
 75            if len(samples) != len(names):
 76                raise ValueError('Length of samples and names incompatible.')
 77            if idl is not None:
 78                if len(idl) != len(names):
 79                    raise ValueError('Length of idl incompatible with samples and names.')
 80            name_length = len(names)
 81            if name_length > 1:
 82                if name_length != len(set(names)):
 83                    raise ValueError('Names are not unique.')
 84                if not all(isinstance(x, str) for x in names):
 85                    raise TypeError('All names have to be strings.')
 86                if len(set([o.split('|')[0] for o in names])) > 1:
 87                    raise ValueError('Cannot initialize Obs based on multiple ensembles. Please average separate Obs from each ensemble.')
 88            else:
 89                if not isinstance(names[0], str):
 90                    raise TypeError('All names have to be strings.')
 91            if min(len(x) for x in samples) <= 4:
 92                raise ValueError('Samples have to have at least 5 entries.')
 93
 94        self.names = sorted(names)
 95        self.shape = {}
 96        self.r_values = {}
 97        self.deltas = {}
 98        self._covobs = {}
 99
100        self._value = 0
101        self.N = 0
102        self.idl = {}
103        if idl is not None:
104            for name, idx in sorted(zip(names, idl)):
105                if isinstance(idx, range):
106                    self.idl[name] = idx
107                elif isinstance(idx, (list, np.ndarray)):
108                    dc = np.unique(np.diff(idx))
109                    if np.any(dc < 0):
110                        raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]])))
111                    elif np.any(dc == 0):
112                        raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]])))
113                    if len(dc) == 1:
114                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
115                    else:
116                        self.idl[name] = list(idx)
117                else:
118                    raise TypeError('incompatible type for idl[%s].' % (name))
119        else:
120            for name, sample in sorted(zip(names, samples)):
121                self.idl[name] = range(1, len(sample) + 1)
122
123        if kwargs.get("means") is not None:
124            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
125                self.shape[name] = len(self.idl[name])
126                self.N += self.shape[name]
127                self.r_values[name] = mean
128                self.deltas[name] = sample
129        else:
130            for name, sample in sorted(zip(names, samples)):
131                self.shape[name] = len(self.idl[name])
132                self.N += self.shape[name]
133                if len(sample) != self.shape[name]:
134                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
135                self.r_values[name] = np.mean(sample)
136                self.deltas[name] = sample - self.r_values[name]
137                self._value += self.shape[name] * self.r_values[name]
138            self._value /= self.N
139
140        self._dvalue = 0.0
141        self.ddvalue = 0.0
142        self.reweighted = False
143
144        self.tag = None
145
146    @property
147    def value(self):
148        return self._value
149
150    @property
151    def dvalue(self):
152        return self._dvalue
153
154    @property
155    def e_names(self):
156        return sorted(set([o.split('|')[0] for o in self.names]))
157
158    @property
159    def cov_names(self):
160        return sorted(set([o for o in self.covobs.keys()]))
161
162    @property
163    def mc_names(self):
164        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
165
166    @property
167    def e_content(self):
168        res = {}
169        for e, e_name in enumerate(self.e_names):
170            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
171            if e_name in self.names:
172                res[e_name].append(e_name)
173        return res
174
175    @property
176    def covobs(self):
177        return self._covobs
178
179    def gamma_method(self, **kwargs):
180        """Estimate the error and related properties of the Obs.
181
182        Parameters
183        ----------
184        S : float
185            specifies a custom value for the parameter S (default 2.0).
186            If set to 0 it is assumed that the data exhibits no
187            autocorrelation. In this case the error estimates coincides
188            with the sample standard error.
189        tau_exp : float
190            positive value triggers the critical slowing down analysis
191            (default 0.0).
192        N_sigma : float
193            number of standard deviations from zero until the tail is
194            attached to the autocorrelation function (default 1).
195        fft : bool
196            determines whether the fft algorithm is used for the computation
197            of the autocorrelation function (default True)
198        """
199
200        e_content = self.e_content
201        self.e_dvalue = {}
202        self.e_ddvalue = {}
203        self.e_tauint = {}
204        self.e_dtauint = {}
205        self.e_windowsize = {}
206        self.e_n_tauint = {}
207        self.e_n_dtauint = {}
208        e_gamma = {}
209        self.e_rho = {}
210        self.e_drho = {}
211        self._dvalue = 0
212        self.ddvalue = 0
213
214        self.S = {}
215        self.tau_exp = {}
216        self.N_sigma = {}
217
218        if kwargs.get('fft') is False:
219            fft = False
220        else:
221            fft = True
222
223        def _parse_kwarg(kwarg_name):
224            if kwarg_name in kwargs:
225                tmp = kwargs.get(kwarg_name)
226                if isinstance(tmp, (int, float)):
227                    if tmp < 0:
228                        raise ValueError(kwarg_name + ' has to be larger or equal to 0.')
229                    for e, e_name in enumerate(self.e_names):
230                        getattr(self, kwarg_name)[e_name] = tmp
231                else:
232                    raise TypeError(kwarg_name + ' is not in proper format.')
233            else:
234                for e, e_name in enumerate(self.e_names):
235                    if e_name in getattr(Obs, kwarg_name + '_dict'):
236                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
237                    else:
238                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
239
240        _parse_kwarg('S')
241        _parse_kwarg('tau_exp')
242        _parse_kwarg('N_sigma')
243
244        for e, e_name in enumerate(self.mc_names):
245            gapsize = _determine_gap(self, e_content, e_name)
246
247            r_length = []
248            for r_name in e_content[e_name]:
249                if isinstance(self.idl[r_name], range):
250                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
251                else:
252                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
253
254            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
255            w_max = max(r_length) // 2
256            e_gamma[e_name] = np.zeros(w_max)
257            self.e_rho[e_name] = np.zeros(w_max)
258            self.e_drho[e_name] = np.zeros(w_max)
259
260            for r_name in e_content[e_name]:
261                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
262
263            gamma_div = np.zeros(w_max)
264            for r_name in e_content[e_name]:
265                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
266            gamma_div[gamma_div < 1] = 1.0
267            e_gamma[e_name] /= gamma_div[:w_max]
268
269            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
270                self.e_tauint[e_name] = 0.5
271                self.e_dtauint[e_name] = 0.0
272                self.e_dvalue[e_name] = 0.0
273                self.e_ddvalue[e_name] = 0.0
274                self.e_windowsize[e_name] = 0
275                continue
276
277            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
278            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
279            # Make sure no entry of tauint is smaller than 0.5
280            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
281            # hep-lat/0306017 eq. (42)
282            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
283            self.e_n_dtauint[e_name][0] = 0.0
284
285            def _compute_drho(i):
286                tmp = (self.e_rho[e_name][i + 1:w_max]
287                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
288                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
289                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
290                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
291
292            if self.tau_exp[e_name] > 0:
293                _compute_drho(1)
294                texp = self.tau_exp[e_name]
295                # Critical slowing down analysis
296                if w_max // 2 <= 1:
297                    raise ValueError("Need at least 8 samples for tau_exp error analysis")
298                for n in range(1, w_max // 2):
299                    _compute_drho(n + 1)
300                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
301                        # Bias correction hep-lat/0306017 eq. (49) included
302                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
303                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
304                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
305                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
306                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
307                        self.e_windowsize[e_name] = n
308                        break
309            else:
310                if self.S[e_name] == 0.0:
311                    self.e_tauint[e_name] = 0.5
312                    self.e_dtauint[e_name] = 0.0
313                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
314                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
315                    self.e_windowsize[e_name] = 0
316                else:
317                    # Standard automatic windowing procedure
318                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
319                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
320                    for n in range(1, w_max):
321                        if g_w[n - 1] < 0 or n >= w_max - 1:
322                            _compute_drho(n)
323                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
324                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
325                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
326                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
327                            self.e_windowsize[e_name] = n
328                            break
329
330            self._dvalue += self.e_dvalue[e_name] ** 2
331            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
332
333        for e_name in self.cov_names:
334            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
335            self.e_ddvalue[e_name] = 0
336            self._dvalue += self.e_dvalue[e_name]**2
337
338        self._dvalue = np.sqrt(self._dvalue)
339        if self._dvalue == 0.0:
340            self.ddvalue = 0.0
341        else:
342            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
343        return
344
345    gm = gamma_method
346
347    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
348        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
349           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
350
351        Parameters
352        ----------
353        deltas : list
354            List of fluctuations
355        idx : list
356            List or range of configurations on which the deltas are defined.
357        shape : int
358            Number of configurations in idx.
359        w_max : int
360            Upper bound for the summation window.
361        fft : bool
362            determines whether the fft algorithm is used for the computation
363            of the autocorrelation function.
364        gapsize : int
365            The target distance between two configurations. If longer distances
366            are found in idx, the data is expanded.
367        """
368        gamma = np.zeros(w_max)
369        deltas = _expand_deltas(deltas, idx, shape, gapsize)
370        new_shape = len(deltas)
371        if fft:
372            max_gamma = min(new_shape, w_max)
373            # The padding for the fft has to be even
374            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
375            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
376        else:
377            for n in range(w_max):
378                if new_shape - n >= 0:
379                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
380
381        return gamma
382
383    def details(self, ens_content=True):
384        """Output detailed properties of the Obs.
385
386        Parameters
387        ----------
388        ens_content : bool
389            print details about the ensembles and replica if true.
390        """
391        if self.tag is not None:
392            print("Description:", self.tag)
393        if not hasattr(self, 'e_dvalue'):
394            print('Result\t %3.8e' % (self.value))
395        else:
396            if self.value == 0.0:
397                percentage = np.nan
398            else:
399                percentage = np.abs(self._dvalue / self.value) * 100
400            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
401            if len(self.e_names) > 1:
402                print(' Ensemble errors:')
403            e_content = self.e_content
404            for e_name in self.mc_names:
405                gap = _determine_gap(self, e_content, e_name)
406
407                if len(self.e_names) > 1:
408                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
409                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
410                tau_string += f" in units of {gap} config"
411                if gap > 1:
412                    tau_string += "s"
413                if self.tau_exp[e_name] > 0:
414                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
415                else:
416                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
417                print(tau_string)
418            for e_name in self.cov_names:
419                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
420        if ens_content is True:
421            if len(self.e_names) == 1:
422                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
423            else:
424                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
425            my_string_list = []
426            for key, value in sorted(self.e_content.items()):
427                if key not in self.covobs:
428                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
429                    if len(value) == 1:
430                        my_string += f': {self.shape[value[0]]} configurations'
431                        if isinstance(self.idl[value[0]], range):
432                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
433                        else:
434                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
435                    else:
436                        sublist = []
437                        for v in value:
438                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
439                            my_substring += f': {self.shape[v]} configurations'
440                            if isinstance(self.idl[v], range):
441                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
442                            else:
443                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
444                            sublist.append(my_substring)
445
446                        my_string += '\n' + '\n'.join(sublist)
447                else:
448                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
449                my_string_list.append(my_string)
450            print('\n'.join(my_string_list))
451
452    def reweight(self, weight):
453        """Reweight the obs with given rewighting factors.
454
455        Parameters
456        ----------
457        weight : Obs
458            Reweighting factor. An Observable that has to be defined on a superset of the
459            configurations in obs[i].idl for all i.
460        all_configs : bool
461            if True, the reweighted observables are normalized by the average of
462            the reweighting factor on all configurations in weight.idl and not
463            on the configurations in obs[i].idl. Default False.
464        """
465        return reweight(weight, [self])[0]
466
467    def is_zero_within_error(self, sigma=1):
468        """Checks whether the observable is zero within 'sigma' standard errors.
469
470        Parameters
471        ----------
472        sigma : int
473            Number of standard errors used for the check.
474
475        Works only properly when the gamma method was run.
476        """
477        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
478
479    def is_zero(self, atol=1e-10):
480        """Checks whether the observable is zero within a given tolerance.
481
482        Parameters
483        ----------
484        atol : float
485            Absolute tolerance (for details see numpy documentation).
486        """
487        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
488
489    def plot_tauint(self, save=None):
490        """Plot integrated autocorrelation time for each ensemble.
491
492        Parameters
493        ----------
494        save : str
495            saves the figure to a file named 'save' if.
496        """
497        if not hasattr(self, 'e_dvalue'):
498            raise Exception('Run the gamma method first.')
499
500        for e, e_name in enumerate(self.mc_names):
501            fig = plt.figure()
502            plt.xlabel(r'$W$')
503            plt.ylabel(r'$\tau_\mathrm{int}$')
504            length = int(len(self.e_n_tauint[e_name]))
505            if self.tau_exp[e_name] > 0:
506                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
507                x_help = np.arange(2 * self.tau_exp[e_name])
508                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
509                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
510                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
511                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
512                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
513                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
514                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
515            else:
516                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
517                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
518
519            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
520            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
521            plt.legend()
522            plt.xlim(-0.5, xmax)
523            ylim = plt.ylim()
524            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
525            plt.draw()
526            if save:
527                fig.savefig(save + "_" + str(e))
528
529    def plot_rho(self, save=None):
530        """Plot normalized autocorrelation function time for each ensemble.
531
532        Parameters
533        ----------
534        save : str
535            saves the figure to a file named 'save' if.
536        """
537        if not hasattr(self, 'e_dvalue'):
538            raise Exception('Run the gamma method first.')
539        for e, e_name in enumerate(self.mc_names):
540            fig = plt.figure()
541            plt.xlabel('W')
542            plt.ylabel('rho')
543            length = int(len(self.e_drho[e_name]))
544            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
545            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
546            if self.tau_exp[e_name] > 0:
547                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
548                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
549                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
550                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
551            else:
552                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
553                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
554            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
555            plt.xlim(-0.5, xmax)
556            plt.draw()
557            if save:
558                fig.savefig(save + "_" + str(e))
559
560    def plot_rep_dist(self):
561        """Plot replica distribution for each ensemble with more than one replicum."""
562        if not hasattr(self, 'e_dvalue'):
563            raise Exception('Run the gamma method first.')
564        for e, e_name in enumerate(self.mc_names):
565            if len(self.e_content[e_name]) == 1:
566                print('No replica distribution for a single replicum (', e_name, ')')
567                continue
568            r_length = []
569            sub_r_mean = 0
570            for r, r_name in enumerate(self.e_content[e_name]):
571                r_length.append(len(self.deltas[r_name]))
572                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
573            e_N = np.sum(r_length)
574            sub_r_mean /= e_N
575            arr = np.zeros(len(self.e_content[e_name]))
576            for r, r_name in enumerate(self.e_content[e_name]):
577                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
578            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
579            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
580            plt.draw()
581
582    def plot_history(self, expand=True):
583        """Plot derived Monte Carlo history for each ensemble
584
585        Parameters
586        ----------
587        expand : bool
588            show expanded history for irregular Monte Carlo chains (default: True).
589        """
590        for e, e_name in enumerate(self.mc_names):
591            plt.figure()
592            r_length = []
593            tmp = []
594            tmp_expanded = []
595            for r, r_name in enumerate(self.e_content[e_name]):
596                tmp.append(self.deltas[r_name] + self.r_values[r_name])
597                if expand:
598                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
599                    r_length.append(len(tmp_expanded[-1]))
600                else:
601                    r_length.append(len(tmp[-1]))
602            e_N = np.sum(r_length)
603            x = np.arange(e_N)
604            y_test = np.concatenate(tmp, axis=0)
605            if expand:
606                y = np.concatenate(tmp_expanded, axis=0)
607            else:
608                y = y_test
609            plt.errorbar(x, y, fmt='.', markersize=3)
610            plt.xlim(-0.5, e_N - 0.5)
611            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
612            plt.draw()
613
614    def plot_piechart(self, save=None):
615        """Plot piechart which shows the fractional contribution of each
616        ensemble to the error and returns a dictionary containing the fractions.
617
618        Parameters
619        ----------
620        save : str
621            saves the figure to a file named 'save' if.
622        """
623        if not hasattr(self, 'e_dvalue'):
624            raise Exception('Run the gamma method first.')
625        if np.isclose(0.0, self._dvalue, atol=1e-15):
626            raise ValueError('Error is 0.0')
627        labels = self.e_names
628        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
629        fig1, ax1 = plt.subplots()
630        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
631        ax1.axis('equal')
632        plt.draw()
633        if save:
634            fig1.savefig(save)
635
636        return dict(zip(labels, sizes))
637
638    def dump(self, filename, datatype="json.gz", description="", **kwargs):
639        """Dump the Obs to a file 'name' of chosen format.
640
641        Parameters
642        ----------
643        filename : str
644            name of the file to be saved.
645        datatype : str
646            Format of the exported file. Supported formats include
647            "json.gz" and "pickle"
648        description : str
649            Description for output file, only relevant for json.gz format.
650        path : str
651            specifies a custom path for the file (default '.')
652        """
653        if 'path' in kwargs:
654            file_name = kwargs.get('path') + '/' + filename
655        else:
656            file_name = filename
657
658        if datatype == "json.gz":
659            from .input.json import dump_to_json
660            dump_to_json([self], file_name, description=description)
661        elif datatype == "pickle":
662            with open(file_name + '.p', 'wb') as fb:
663                pickle.dump(self, fb)
664        else:
665            raise TypeError("Unknown datatype " + str(datatype))
666
667    def export_jackknife(self):
668        """Export jackknife samples from the Obs
669
670        Returns
671        -------
672        numpy.ndarray
673            Returns a numpy array of length N + 1 where N is the number of samples
674            for the given ensemble and replicum. The zeroth entry of the array contains
675            the mean value of the Obs, entries 1 to N contain the N jackknife samples
676            derived from the Obs. The current implementation only works for observables
677            defined on exactly one ensemble and replicum. The derived jackknife samples
678            should agree with samples from a full jackknife analysis up to O(1/N).
679        """
680
681        if len(self.names) != 1:
682            raise ValueError("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
683
684        name = self.names[0]
685        full_data = self.deltas[name] + self.r_values[name]
686        n = full_data.size
687        mean = self.value
688        tmp_jacks = np.zeros(n + 1)
689        tmp_jacks[0] = mean
690        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
691        return tmp_jacks
692
693    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
694        """Export bootstrap samples from the Obs
695
696        Parameters
697        ----------
698        samples : int
699            Number of bootstrap samples to generate.
700        random_numbers : np.ndarray
701            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
702            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
703        save_rng : str
704            Save the random numbers to a file if a path is specified.
705
706        Returns
707        -------
708        numpy.ndarray
709            Returns a numpy array of length N + 1 where N is the number of samples
710            for the given ensemble and replicum. The zeroth entry of the array contains
711            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
712            derived from the Obs. The current implementation only works for observables
713            defined on exactly one ensemble and replicum. The derived bootstrap samples
714            should agree with samples from a full bootstrap analysis up to O(1/N).
715        """
716        if len(self.names) != 1:
717            raise ValueError("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
718
719        name = self.names[0]
720        length = self.N
721
722        if random_numbers is None:
723            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
724            rng = np.random.default_rng(seed)
725            random_numbers = rng.integers(0, length, size=(samples, length))
726
727        if save_rng is not None:
728            np.savetxt(save_rng, random_numbers, fmt='%i')
729
730        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
731        ret = np.zeros(samples + 1)
732        ret[0] = self.value
733        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
734        return ret
735
736    def __float__(self):
737        return float(self.value)
738
739    def __repr__(self):
740        return 'Obs[' + str(self) + ']'
741
742    def __str__(self):
743        return _format_uncertainty(self.value, self._dvalue)
744
745    def __format__(self, format_type):
746        if format_type == "":
747            significance = 2
748        else:
749            significance = int(float(format_type.replace("+", "").replace("-", "")))
750        my_str = _format_uncertainty(self.value, self._dvalue,
751                                     significance=significance)
752        for char in ["+", " "]:
753            if format_type.startswith(char):
754                if my_str[0] != "-":
755                    my_str = char + my_str
756        return my_str
757
758    def __hash__(self):
759        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
760        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
761        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
762        hash_tuple += tuple([o.encode() for o in self.names])
763        m = hashlib.md5()
764        [m.update(o) for o in hash_tuple]
765        return int(m.hexdigest(), 16) & 0xFFFFFFFF
766
767    # Overload comparisons
768    def __lt__(self, other):
769        return self.value < other
770
771    def __le__(self, other):
772        return self.value <= other
773
774    def __gt__(self, other):
775        return self.value > other
776
777    def __ge__(self, other):
778        return self.value >= other
779
780    def __eq__(self, other):
781        if other is None:
782            return False
783        return (self - other).is_zero()
784
785    # Overload math operations
786    def __add__(self, y):
787        if isinstance(y, Obs):
788            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
789        else:
790            if isinstance(y, np.ndarray):
791                return np.array([self + o for o in y])
792            elif isinstance(y, complex):
793                return CObs(self, 0) + y
794            elif y.__class__.__name__ in ['Corr', 'CObs']:
795                return NotImplemented
796            else:
797                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
798
799    def __radd__(self, y):
800        return self + y
801
802    def __mul__(self, y):
803        if isinstance(y, Obs):
804            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
805        else:
806            if isinstance(y, np.ndarray):
807                return np.array([self * o for o in y])
808            elif isinstance(y, complex):
809                return CObs(self * y.real, self * y.imag)
810            elif y.__class__.__name__ in ['Corr', 'CObs']:
811                return NotImplemented
812            else:
813                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
814
815    def __rmul__(self, y):
816        return self * y
817
818    def __sub__(self, y):
819        if isinstance(y, Obs):
820            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
821        else:
822            if isinstance(y, np.ndarray):
823                return np.array([self - o for o in y])
824            elif y.__class__.__name__ in ['Corr', 'CObs']:
825                return NotImplemented
826            else:
827                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
828
829    def __rsub__(self, y):
830        return -1 * (self - y)
831
832    def __pos__(self):
833        return self
834
835    def __neg__(self):
836        return -1 * self
837
838    def __truediv__(self, y):
839        if isinstance(y, Obs):
840            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
841        else:
842            if isinstance(y, np.ndarray):
843                return np.array([self / o for o in y])
844            elif y.__class__.__name__ in ['Corr', 'CObs']:
845                return NotImplemented
846            else:
847                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
848
849    def __rtruediv__(self, y):
850        if isinstance(y, Obs):
851            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
852        else:
853            if isinstance(y, np.ndarray):
854                return np.array([o / self for o in y])
855            elif y.__class__.__name__ in ['Corr', 'CObs']:
856                return NotImplemented
857            else:
858                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
859
860    def __pow__(self, y):
861        if isinstance(y, Obs):
862            return derived_observable(lambda x, **kwargs: x[0] ** x[1], [self, y], man_grad=[y.value * self.value ** (y.value - 1), self.value ** y.value * np.log(self.value)])
863        else:
864            return derived_observable(lambda x, **kwargs: x[0] ** y, [self], man_grad=[y * self.value ** (y - 1)])
865
866    def __rpow__(self, y):
867        return derived_observable(lambda x, **kwargs: y ** x[0], [self], man_grad=[y ** self.value * np.log(y)])
868
869    def __abs__(self):
870        return derived_observable(lambda x: anp.abs(x[0]), [self])
871
872    # Overload numpy functions
873    def sqrt(self):
874        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
875
876    def log(self):
877        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
878
879    def exp(self):
880        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
881
882    def sin(self):
883        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
884
885    def cos(self):
886        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
887
888    def tan(self):
889        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
890
891    def arcsin(self):
892        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
893
894    def arccos(self):
895        return derived_observable(lambda x: anp.arccos(x[0]), [self])
896
897    def arctan(self):
898        return derived_observable(lambda x: anp.arctan(x[0]), [self])
899
900    def sinh(self):
901        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
902
903    def cosh(self):
904        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
905
906    def tanh(self):
907        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
908
909    def arcsinh(self):
910        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
911
912    def arccosh(self):
913        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
914
915    def arctanh(self):
916        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 61    def __init__(self, samples, names, idl=None, **kwargs):
 62        """ Initialize Obs object.
 63
 64        Parameters
 65        ----------
 66        samples : list
 67            list of numpy arrays containing the Monte Carlo samples
 68        names : list
 69            list of strings labeling the individual samples
 70        idl : list, optional
 71            list of ranges or lists on which the samples are defined
 72        """
 73
 74        if kwargs.get("means") is None and len(samples):
 75            if len(samples) != len(names):
 76                raise ValueError('Length of samples and names incompatible.')
 77            if idl is not None:
 78                if len(idl) != len(names):
 79                    raise ValueError('Length of idl incompatible with samples and names.')
 80            name_length = len(names)
 81            if name_length > 1:
 82                if name_length != len(set(names)):
 83                    raise ValueError('Names are not unique.')
 84                if not all(isinstance(x, str) for x in names):
 85                    raise TypeError('All names have to be strings.')
 86                if len(set([o.split('|')[0] for o in names])) > 1:
 87                    raise ValueError('Cannot initialize Obs based on multiple ensembles. Please average separate Obs from each ensemble.')
 88            else:
 89                if not isinstance(names[0], str):
 90                    raise TypeError('All names have to be strings.')
 91            if min(len(x) for x in samples) <= 4:
 92                raise ValueError('Samples have to have at least 5 entries.')
 93
 94        self.names = sorted(names)
 95        self.shape = {}
 96        self.r_values = {}
 97        self.deltas = {}
 98        self._covobs = {}
 99
100        self._value = 0
101        self.N = 0
102        self.idl = {}
103        if idl is not None:
104            for name, idx in sorted(zip(names, idl)):
105                if isinstance(idx, range):
106                    self.idl[name] = idx
107                elif isinstance(idx, (list, np.ndarray)):
108                    dc = np.unique(np.diff(idx))
109                    if np.any(dc < 0):
110                        raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]])))
111                    elif np.any(dc == 0):
112                        raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]])))
113                    if len(dc) == 1:
114                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
115                    else:
116                        self.idl[name] = list(idx)
117                else:
118                    raise TypeError('incompatible type for idl[%s].' % (name))
119        else:
120            for name, sample in sorted(zip(names, samples)):
121                self.idl[name] = range(1, len(sample) + 1)
122
123        if kwargs.get("means") is not None:
124            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
125                self.shape[name] = len(self.idl[name])
126                self.N += self.shape[name]
127                self.r_values[name] = mean
128                self.deltas[name] = sample
129        else:
130            for name, sample in sorted(zip(names, samples)):
131                self.shape[name] = len(self.idl[name])
132                self.N += self.shape[name]
133                if len(sample) != self.shape[name]:
134                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
135                self.r_values[name] = np.mean(sample)
136                self.deltas[name] = sample - self.r_values[name]
137                self._value += self.shape[name] * self.r_values[name]
138            self._value /= self.N
139
140        self._dvalue = 0.0
141        self.ddvalue = 0.0
142        self.reweighted = False
143
144        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
S_global = 2.0
S_dict = {}
tau_exp_global = 0.0
tau_exp_dict = {}
N_sigma_global = 1.0
N_sigma_dict = {}
names
shape
r_values
deltas
N
idl
ddvalue
reweighted
tag
value
146    @property
147    def value(self):
148        return self._value
dvalue
150    @property
151    def dvalue(self):
152        return self._dvalue
e_names
154    @property
155    def e_names(self):
156        return sorted(set([o.split('|')[0] for o in self.names]))
cov_names
158    @property
159    def cov_names(self):
160        return sorted(set([o for o in self.covobs.keys()]))
mc_names
162    @property
163    def mc_names(self):
164        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
e_content
166    @property
167    def e_content(self):
168        res = {}
169        for e, e_name in enumerate(self.e_names):
170            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
171            if e_name in self.names:
172                res[e_name].append(e_name)
173        return res
covobs
175    @property
176    def covobs(self):
177        return self._covobs
def gamma_method(self, **kwargs):
179    def gamma_method(self, **kwargs):
180        """Estimate the error and related properties of the Obs.
181
182        Parameters
183        ----------
184        S : float
185            specifies a custom value for the parameter S (default 2.0).
186            If set to 0 it is assumed that the data exhibits no
187            autocorrelation. In this case the error estimates coincides
188            with the sample standard error.
189        tau_exp : float
190            positive value triggers the critical slowing down analysis
191            (default 0.0).
192        N_sigma : float
193            number of standard deviations from zero until the tail is
194            attached to the autocorrelation function (default 1).
195        fft : bool
196            determines whether the fft algorithm is used for the computation
197            of the autocorrelation function (default True)
198        """
199
200        e_content = self.e_content
201        self.e_dvalue = {}
202        self.e_ddvalue = {}
203        self.e_tauint = {}
204        self.e_dtauint = {}
205        self.e_windowsize = {}
206        self.e_n_tauint = {}
207        self.e_n_dtauint = {}
208        e_gamma = {}
209        self.e_rho = {}
210        self.e_drho = {}
211        self._dvalue = 0
212        self.ddvalue = 0
213
214        self.S = {}
215        self.tau_exp = {}
216        self.N_sigma = {}
217
218        if kwargs.get('fft') is False:
219            fft = False
220        else:
221            fft = True
222
223        def _parse_kwarg(kwarg_name):
224            if kwarg_name in kwargs:
225                tmp = kwargs.get(kwarg_name)
226                if isinstance(tmp, (int, float)):
227                    if tmp < 0:
228                        raise ValueError(kwarg_name + ' has to be larger or equal to 0.')
229                    for e, e_name in enumerate(self.e_names):
230                        getattr(self, kwarg_name)[e_name] = tmp
231                else:
232                    raise TypeError(kwarg_name + ' is not in proper format.')
233            else:
234                for e, e_name in enumerate(self.e_names):
235                    if e_name in getattr(Obs, kwarg_name + '_dict'):
236                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
237                    else:
238                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
239
240        _parse_kwarg('S')
241        _parse_kwarg('tau_exp')
242        _parse_kwarg('N_sigma')
243
244        for e, e_name in enumerate(self.mc_names):
245            gapsize = _determine_gap(self, e_content, e_name)
246
247            r_length = []
248            for r_name in e_content[e_name]:
249                if isinstance(self.idl[r_name], range):
250                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
251                else:
252                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
253
254            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
255            w_max = max(r_length) // 2
256            e_gamma[e_name] = np.zeros(w_max)
257            self.e_rho[e_name] = np.zeros(w_max)
258            self.e_drho[e_name] = np.zeros(w_max)
259
260            for r_name in e_content[e_name]:
261                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
262
263            gamma_div = np.zeros(w_max)
264            for r_name in e_content[e_name]:
265                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
266            gamma_div[gamma_div < 1] = 1.0
267            e_gamma[e_name] /= gamma_div[:w_max]
268
269            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
270                self.e_tauint[e_name] = 0.5
271                self.e_dtauint[e_name] = 0.0
272                self.e_dvalue[e_name] = 0.0
273                self.e_ddvalue[e_name] = 0.0
274                self.e_windowsize[e_name] = 0
275                continue
276
277            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
278            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
279            # Make sure no entry of tauint is smaller than 0.5
280            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
281            # hep-lat/0306017 eq. (42)
282            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
283            self.e_n_dtauint[e_name][0] = 0.0
284
285            def _compute_drho(i):
286                tmp = (self.e_rho[e_name][i + 1:w_max]
287                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
288                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
289                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
290                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
291
292            if self.tau_exp[e_name] > 0:
293                _compute_drho(1)
294                texp = self.tau_exp[e_name]
295                # Critical slowing down analysis
296                if w_max // 2 <= 1:
297                    raise ValueError("Need at least 8 samples for tau_exp error analysis")
298                for n in range(1, w_max // 2):
299                    _compute_drho(n + 1)
300                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
301                        # Bias correction hep-lat/0306017 eq. (49) included
302                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
303                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
304                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
305                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
306                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
307                        self.e_windowsize[e_name] = n
308                        break
309            else:
310                if self.S[e_name] == 0.0:
311                    self.e_tauint[e_name] = 0.5
312                    self.e_dtauint[e_name] = 0.0
313                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
314                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
315                    self.e_windowsize[e_name] = 0
316                else:
317                    # Standard automatic windowing procedure
318                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
319                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
320                    for n in range(1, w_max):
321                        if g_w[n - 1] < 0 or n >= w_max - 1:
322                            _compute_drho(n)
323                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
324                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
325                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
326                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
327                            self.e_windowsize[e_name] = n
328                            break
329
330            self._dvalue += self.e_dvalue[e_name] ** 2
331            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
332
333        for e_name in self.cov_names:
334            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
335            self.e_ddvalue[e_name] = 0
336            self._dvalue += self.e_dvalue[e_name]**2
337
338        self._dvalue = np.sqrt(self._dvalue)
339        if self._dvalue == 0.0:
340            self.ddvalue = 0.0
341        else:
342            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
343        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def gm(self, **kwargs):
179    def gamma_method(self, **kwargs):
180        """Estimate the error and related properties of the Obs.
181
182        Parameters
183        ----------
184        S : float
185            specifies a custom value for the parameter S (default 2.0).
186            If set to 0 it is assumed that the data exhibits no
187            autocorrelation. In this case the error estimates coincides
188            with the sample standard error.
189        tau_exp : float
190            positive value triggers the critical slowing down analysis
191            (default 0.0).
192        N_sigma : float
193            number of standard deviations from zero until the tail is
194            attached to the autocorrelation function (default 1).
195        fft : bool
196            determines whether the fft algorithm is used for the computation
197            of the autocorrelation function (default True)
198        """
199
200        e_content = self.e_content
201        self.e_dvalue = {}
202        self.e_ddvalue = {}
203        self.e_tauint = {}
204        self.e_dtauint = {}
205        self.e_windowsize = {}
206        self.e_n_tauint = {}
207        self.e_n_dtauint = {}
208        e_gamma = {}
209        self.e_rho = {}
210        self.e_drho = {}
211        self._dvalue = 0
212        self.ddvalue = 0
213
214        self.S = {}
215        self.tau_exp = {}
216        self.N_sigma = {}
217
218        if kwargs.get('fft') is False:
219            fft = False
220        else:
221            fft = True
222
223        def _parse_kwarg(kwarg_name):
224            if kwarg_name in kwargs:
225                tmp = kwargs.get(kwarg_name)
226                if isinstance(tmp, (int, float)):
227                    if tmp < 0:
228                        raise ValueError(kwarg_name + ' has to be larger or equal to 0.')
229                    for e, e_name in enumerate(self.e_names):
230                        getattr(self, kwarg_name)[e_name] = tmp
231                else:
232                    raise TypeError(kwarg_name + ' is not in proper format.')
233            else:
234                for e, e_name in enumerate(self.e_names):
235                    if e_name in getattr(Obs, kwarg_name + '_dict'):
236                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
237                    else:
238                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
239
240        _parse_kwarg('S')
241        _parse_kwarg('tau_exp')
242        _parse_kwarg('N_sigma')
243
244        for e, e_name in enumerate(self.mc_names):
245            gapsize = _determine_gap(self, e_content, e_name)
246
247            r_length = []
248            for r_name in e_content[e_name]:
249                if isinstance(self.idl[r_name], range):
250                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
251                else:
252                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
253
254            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
255            w_max = max(r_length) // 2
256            e_gamma[e_name] = np.zeros(w_max)
257            self.e_rho[e_name] = np.zeros(w_max)
258            self.e_drho[e_name] = np.zeros(w_max)
259
260            for r_name in e_content[e_name]:
261                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
262
263            gamma_div = np.zeros(w_max)
264            for r_name in e_content[e_name]:
265                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
266            gamma_div[gamma_div < 1] = 1.0
267            e_gamma[e_name] /= gamma_div[:w_max]
268
269            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
270                self.e_tauint[e_name] = 0.5
271                self.e_dtauint[e_name] = 0.0
272                self.e_dvalue[e_name] = 0.0
273                self.e_ddvalue[e_name] = 0.0
274                self.e_windowsize[e_name] = 0
275                continue
276
277            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
278            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
279            # Make sure no entry of tauint is smaller than 0.5
280            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
281            # hep-lat/0306017 eq. (42)
282            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
283            self.e_n_dtauint[e_name][0] = 0.0
284
285            def _compute_drho(i):
286                tmp = (self.e_rho[e_name][i + 1:w_max]
287                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
288                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
289                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
290                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
291
292            if self.tau_exp[e_name] > 0:
293                _compute_drho(1)
294                texp = self.tau_exp[e_name]
295                # Critical slowing down analysis
296                if w_max // 2 <= 1:
297                    raise ValueError("Need at least 8 samples for tau_exp error analysis")
298                for n in range(1, w_max // 2):
299                    _compute_drho(n + 1)
300                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
301                        # Bias correction hep-lat/0306017 eq. (49) included
302                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
303                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
304                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
305                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
306                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
307                        self.e_windowsize[e_name] = n
308                        break
309            else:
310                if self.S[e_name] == 0.0:
311                    self.e_tauint[e_name] = 0.5
312                    self.e_dtauint[e_name] = 0.0
313                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
314                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
315                    self.e_windowsize[e_name] = 0
316                else:
317                    # Standard automatic windowing procedure
318                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
319                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
320                    for n in range(1, w_max):
321                        if g_w[n - 1] < 0 or n >= w_max - 1:
322                            _compute_drho(n)
323                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
324                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
325                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
326                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
327                            self.e_windowsize[e_name] = n
328                            break
329
330            self._dvalue += self.e_dvalue[e_name] ** 2
331            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
332
333        for e_name in self.cov_names:
334            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
335            self.e_ddvalue[e_name] = 0
336            self._dvalue += self.e_dvalue[e_name]**2
337
338        self._dvalue = np.sqrt(self._dvalue)
339        if self._dvalue == 0.0:
340            self.ddvalue = 0.0
341        else:
342            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
343        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True):
383    def details(self, ens_content=True):
384        """Output detailed properties of the Obs.
385
386        Parameters
387        ----------
388        ens_content : bool
389            print details about the ensembles and replica if true.
390        """
391        if self.tag is not None:
392            print("Description:", self.tag)
393        if not hasattr(self, 'e_dvalue'):
394            print('Result\t %3.8e' % (self.value))
395        else:
396            if self.value == 0.0:
397                percentage = np.nan
398            else:
399                percentage = np.abs(self._dvalue / self.value) * 100
400            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
401            if len(self.e_names) > 1:
402                print(' Ensemble errors:')
403            e_content = self.e_content
404            for e_name in self.mc_names:
405                gap = _determine_gap(self, e_content, e_name)
406
407                if len(self.e_names) > 1:
408                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
409                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
410                tau_string += f" in units of {gap} config"
411                if gap > 1:
412                    tau_string += "s"
413                if self.tau_exp[e_name] > 0:
414                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
415                else:
416                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
417                print(tau_string)
418            for e_name in self.cov_names:
419                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
420        if ens_content is True:
421            if len(self.e_names) == 1:
422                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
423            else:
424                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
425            my_string_list = []
426            for key, value in sorted(self.e_content.items()):
427                if key not in self.covobs:
428                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
429                    if len(value) == 1:
430                        my_string += f': {self.shape[value[0]]} configurations'
431                        if isinstance(self.idl[value[0]], range):
432                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
433                        else:
434                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
435                    else:
436                        sublist = []
437                        for v in value:
438                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
439                            my_substring += f': {self.shape[v]} configurations'
440                            if isinstance(self.idl[v], range):
441                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
442                            else:
443                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
444                            sublist.append(my_substring)
445
446                        my_string += '\n' + '\n'.join(sublist)
447                else:
448                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
449                my_string_list.append(my_string)
450            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def reweight(self, weight):
452    def reweight(self, weight):
453        """Reweight the obs with given rewighting factors.
454
455        Parameters
456        ----------
457        weight : Obs
458            Reweighting factor. An Observable that has to be defined on a superset of the
459            configurations in obs[i].idl for all i.
460        all_configs : bool
461            if True, the reweighted observables are normalized by the average of
462            the reweighting factor on all configurations in weight.idl and not
463            on the configurations in obs[i].idl. Default False.
464        """
465        return reweight(weight, [self])[0]

Reweight the obs with given rewighting factors.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def is_zero_within_error(self, sigma=1):
467    def is_zero_within_error(self, sigma=1):
468        """Checks whether the observable is zero within 'sigma' standard errors.
469
470        Parameters
471        ----------
472        sigma : int
473            Number of standard errors used for the check.
474
475        Works only properly when the gamma method was run.
476        """
477        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10):
479    def is_zero(self, atol=1e-10):
480        """Checks whether the observable is zero within a given tolerance.
481
482        Parameters
483        ----------
484        atol : float
485            Absolute tolerance (for details see numpy documentation).
486        """
487        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None):
489    def plot_tauint(self, save=None):
490        """Plot integrated autocorrelation time for each ensemble.
491
492        Parameters
493        ----------
494        save : str
495            saves the figure to a file named 'save' if.
496        """
497        if not hasattr(self, 'e_dvalue'):
498            raise Exception('Run the gamma method first.')
499
500        for e, e_name in enumerate(self.mc_names):
501            fig = plt.figure()
502            plt.xlabel(r'$W$')
503            plt.ylabel(r'$\tau_\mathrm{int}$')
504            length = int(len(self.e_n_tauint[e_name]))
505            if self.tau_exp[e_name] > 0:
506                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
507                x_help = np.arange(2 * self.tau_exp[e_name])
508                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
509                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
510                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
511                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
512                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
513                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
514                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
515            else:
516                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
517                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
518
519            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
520            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
521            plt.legend()
522            plt.xlim(-0.5, xmax)
523            ylim = plt.ylim()
524            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
525            plt.draw()
526            if save:
527                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None):
529    def plot_rho(self, save=None):
530        """Plot normalized autocorrelation function time for each ensemble.
531
532        Parameters
533        ----------
534        save : str
535            saves the figure to a file named 'save' if.
536        """
537        if not hasattr(self, 'e_dvalue'):
538            raise Exception('Run the gamma method first.')
539        for e, e_name in enumerate(self.mc_names):
540            fig = plt.figure()
541            plt.xlabel('W')
542            plt.ylabel('rho')
543            length = int(len(self.e_drho[e_name]))
544            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
545            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
546            if self.tau_exp[e_name] > 0:
547                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
548                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
549                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
550                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
551            else:
552                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
553                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
554            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
555            plt.xlim(-0.5, xmax)
556            plt.draw()
557            if save:
558                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self):
560    def plot_rep_dist(self):
561        """Plot replica distribution for each ensemble with more than one replicum."""
562        if not hasattr(self, 'e_dvalue'):
563            raise Exception('Run the gamma method first.')
564        for e, e_name in enumerate(self.mc_names):
565            if len(self.e_content[e_name]) == 1:
566                print('No replica distribution for a single replicum (', e_name, ')')
567                continue
568            r_length = []
569            sub_r_mean = 0
570            for r, r_name in enumerate(self.e_content[e_name]):
571                r_length.append(len(self.deltas[r_name]))
572                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
573            e_N = np.sum(r_length)
574            sub_r_mean /= e_N
575            arr = np.zeros(len(self.e_content[e_name]))
576            for r, r_name in enumerate(self.e_content[e_name]):
577                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
578            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
579            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
580            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True):
582    def plot_history(self, expand=True):
583        """Plot derived Monte Carlo history for each ensemble
584
585        Parameters
586        ----------
587        expand : bool
588            show expanded history for irregular Monte Carlo chains (default: True).
589        """
590        for e, e_name in enumerate(self.mc_names):
591            plt.figure()
592            r_length = []
593            tmp = []
594            tmp_expanded = []
595            for r, r_name in enumerate(self.e_content[e_name]):
596                tmp.append(self.deltas[r_name] + self.r_values[r_name])
597                if expand:
598                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
599                    r_length.append(len(tmp_expanded[-1]))
600                else:
601                    r_length.append(len(tmp[-1]))
602            e_N = np.sum(r_length)
603            x = np.arange(e_N)
604            y_test = np.concatenate(tmp, axis=0)
605            if expand:
606                y = np.concatenate(tmp_expanded, axis=0)
607            else:
608                y = y_test
609            plt.errorbar(x, y, fmt='.', markersize=3)
610            plt.xlim(-0.5, e_N - 0.5)
611            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
612            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None):
614    def plot_piechart(self, save=None):
615        """Plot piechart which shows the fractional contribution of each
616        ensemble to the error and returns a dictionary containing the fractions.
617
618        Parameters
619        ----------
620        save : str
621            saves the figure to a file named 'save' if.
622        """
623        if not hasattr(self, 'e_dvalue'):
624            raise Exception('Run the gamma method first.')
625        if np.isclose(0.0, self._dvalue, atol=1e-15):
626            raise ValueError('Error is 0.0')
627        labels = self.e_names
628        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
629        fig1, ax1 = plt.subplots()
630        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
631        ax1.axis('equal')
632        plt.draw()
633        if save:
634            fig1.savefig(save)
635
636        return dict(zip(labels, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs):
638    def dump(self, filename, datatype="json.gz", description="", **kwargs):
639        """Dump the Obs to a file 'name' of chosen format.
640
641        Parameters
642        ----------
643        filename : str
644            name of the file to be saved.
645        datatype : str
646            Format of the exported file. Supported formats include
647            "json.gz" and "pickle"
648        description : str
649            Description for output file, only relevant for json.gz format.
650        path : str
651            specifies a custom path for the file (default '.')
652        """
653        if 'path' in kwargs:
654            file_name = kwargs.get('path') + '/' + filename
655        else:
656            file_name = filename
657
658        if datatype == "json.gz":
659            from .input.json import dump_to_json
660            dump_to_json([self], file_name, description=description)
661        elif datatype == "pickle":
662            with open(file_name + '.p', 'wb') as fb:
663                pickle.dump(self, fb)
664        else:
665            raise TypeError("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self):
667    def export_jackknife(self):
668        """Export jackknife samples from the Obs
669
670        Returns
671        -------
672        numpy.ndarray
673            Returns a numpy array of length N + 1 where N is the number of samples
674            for the given ensemble and replicum. The zeroth entry of the array contains
675            the mean value of the Obs, entries 1 to N contain the N jackknife samples
676            derived from the Obs. The current implementation only works for observables
677            defined on exactly one ensemble and replicum. The derived jackknife samples
678            should agree with samples from a full jackknife analysis up to O(1/N).
679        """
680
681        if len(self.names) != 1:
682            raise ValueError("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
683
684        name = self.names[0]
685        full_data = self.deltas[name] + self.r_values[name]
686        n = full_data.size
687        mean = self.value
688        tmp_jacks = np.zeros(n + 1)
689        tmp_jacks[0] = mean
690        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
691        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
693    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
694        """Export bootstrap samples from the Obs
695
696        Parameters
697        ----------
698        samples : int
699            Number of bootstrap samples to generate.
700        random_numbers : np.ndarray
701            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
702            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
703        save_rng : str
704            Save the random numbers to a file if a path is specified.
705
706        Returns
707        -------
708        numpy.ndarray
709            Returns a numpy array of length N + 1 where N is the number of samples
710            for the given ensemble and replicum. The zeroth entry of the array contains
711            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
712            derived from the Obs. The current implementation only works for observables
713            defined on exactly one ensemble and replicum. The derived bootstrap samples
714            should agree with samples from a full bootstrap analysis up to O(1/N).
715        """
716        if len(self.names) != 1:
717            raise ValueError("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
718
719        name = self.names[0]
720        length = self.N
721
722        if random_numbers is None:
723            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
724            rng = np.random.default_rng(seed)
725            random_numbers = rng.integers(0, length, size=(samples, length))
726
727        if save_rng is not None:
728            np.savetxt(save_rng, random_numbers, fmt='%i')
729
730        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
731        ret = np.zeros(samples + 1)
732        ret[0] = self.value
733        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
734        return ret

Export bootstrap samples from the Obs

Parameters
  • samples (int): Number of bootstrap samples to generate.
  • random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
  • save_rng (str): Save the random numbers to a file if a path is specified.
Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived bootstrap samples should agree with samples from a full bootstrap analysis up to O(1/N).
def sqrt(self):
873    def sqrt(self):
874        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self):
876    def log(self):
877        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self):
879    def exp(self):
880        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self):
882    def sin(self):
883        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self):
885    def cos(self):
886        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self):
888    def tan(self):
889        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self):
891    def arcsin(self):
892        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self):
894    def arccos(self):
895        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self):
897    def arctan(self):
898        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self):
900    def sinh(self):
901        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self):
903    def cosh(self):
904        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self):
906    def tanh(self):
907        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self):
909    def arcsinh(self):
910        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self):
912    def arccosh(self):
913        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self):
915    def arctanh(self):
916        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
N_sigma
S
e_ddvalue
e_drho
e_dtauint
e_dvalue
e_n_dtauint
e_n_tauint
e_rho
e_tauint
e_windowsize
tau_exp
class CObs:
 919class CObs:
 920    """Class for a complex valued observable."""
 921    __slots__ = ['_real', '_imag', 'tag']
 922
 923    def __init__(self, real, imag=0.0):
 924        self._real = real
 925        self._imag = imag
 926        self.tag = None
 927
 928    @property
 929    def real(self):
 930        return self._real
 931
 932    @property
 933    def imag(self):
 934        return self._imag
 935
 936    def gamma_method(self, **kwargs):
 937        """Executes the gamma_method for the real and the imaginary part."""
 938        if isinstance(self.real, Obs):
 939            self.real.gamma_method(**kwargs)
 940        if isinstance(self.imag, Obs):
 941            self.imag.gamma_method(**kwargs)
 942
 943    def is_zero(self):
 944        """Checks whether both real and imaginary part are zero within machine precision."""
 945        return self.real == 0.0 and self.imag == 0.0
 946
 947    def conjugate(self):
 948        return CObs(self.real, -self.imag)
 949
 950    def __add__(self, other):
 951        if isinstance(other, np.ndarray):
 952            return other + self
 953        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 954            return CObs(self.real + other.real,
 955                        self.imag + other.imag)
 956        else:
 957            return CObs(self.real + other, self.imag)
 958
 959    def __radd__(self, y):
 960        return self + y
 961
 962    def __sub__(self, other):
 963        if isinstance(other, np.ndarray):
 964            return -1 * (other - self)
 965        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 966            return CObs(self.real - other.real, self.imag - other.imag)
 967        else:
 968            return CObs(self.real - other, self.imag)
 969
 970    def __rsub__(self, other):
 971        return -1 * (self - other)
 972
 973    def __mul__(self, other):
 974        if isinstance(other, np.ndarray):
 975            return other * self
 976        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 977            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 978                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 979                                               [self.real, other.real, self.imag, other.imag],
 980                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 981                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 982                                               [self.real, other.real, self.imag, other.imag],
 983                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 984            elif getattr(other, 'imag', 0) != 0:
 985                return CObs(self.real * other.real - self.imag * other.imag,
 986                            self.imag * other.real + self.real * other.imag)
 987            else:
 988                return CObs(self.real * other.real, self.imag * other.real)
 989        else:
 990            return CObs(self.real * other, self.imag * other)
 991
 992    def __rmul__(self, other):
 993        return self * other
 994
 995    def __truediv__(self, other):
 996        if isinstance(other, np.ndarray):
 997            return 1 / (other / self)
 998        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 999            r = other.real ** 2 + other.imag ** 2
1000            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
1001        else:
1002            return CObs(self.real / other, self.imag / other)
1003
1004    def __rtruediv__(self, other):
1005        r = self.real ** 2 + self.imag ** 2
1006        if hasattr(other, 'real') and hasattr(other, 'imag'):
1007            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1008        else:
1009            return CObs(self.real * other / r, -self.imag * other / r)
1010
1011    def __abs__(self):
1012        return np.sqrt(self.real**2 + self.imag**2)
1013
1014    def __pos__(self):
1015        return self
1016
1017    def __neg__(self):
1018        return -1 * self
1019
1020    def __eq__(self, other):
1021        return self.real == other.real and self.imag == other.imag
1022
1023    def __str__(self):
1024        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1025
1026    def __repr__(self):
1027        return 'CObs[' + str(self) + ']'
1028
1029    def __format__(self, format_type):
1030        if format_type == "":
1031            significance = 2
1032            format_type = "2"
1033        else:
1034            significance = int(float(format_type.replace("+", "").replace("-", "")))
1035        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"

Class for a complex valued observable.

CObs(real, imag=0.0)
923    def __init__(self, real, imag=0.0):
924        self._real = real
925        self._imag = imag
926        self.tag = None
tag
real
928    @property
929    def real(self):
930        return self._real
imag
932    @property
933    def imag(self):
934        return self._imag
def gamma_method(self, **kwargs):
936    def gamma_method(self, **kwargs):
937        """Executes the gamma_method for the real and the imaginary part."""
938        if isinstance(self.real, Obs):
939            self.real.gamma_method(**kwargs)
940        if isinstance(self.imag, Obs):
941            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self):
943    def is_zero(self):
944        """Checks whether both real and imaginary part are zero within machine precision."""
945        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self):
947    def conjugate(self):
948        return CObs(self.real, -self.imag)
def gamma_method(x, **kwargs):
1038def gamma_method(x, **kwargs):
1039    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1040
1041    See docstring of pe.Obs.gamma_method for details.
1042    """
1043    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def gm(x, **kwargs):
1038def gamma_method(x, **kwargs):
1039    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1040
1041    See docstring of pe.Obs.gamma_method for details.
1042    """
1043    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def derived_observable(func, data, array_mode=False, **kwargs):
1173def derived_observable(func, data, array_mode=False, **kwargs):
1174    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1175
1176    Parameters
1177    ----------
1178    func : object
1179        arbitrary function of the form func(data, **kwargs). For the
1180        automatic differentiation to work, all numpy functions have to have
1181        the autograd wrapper (use 'import autograd.numpy as anp').
1182    data : list
1183        list of Obs, e.g. [obs1, obs2, obs3].
1184    num_grad : bool
1185        if True, numerical derivatives are used instead of autograd
1186        (default False). To control the numerical differentiation the
1187        kwargs of numdifftools.step_generators.MaxStepGenerator
1188        can be used.
1189    man_grad : list
1190        manually supply a list or an array which contains the jacobian
1191        of func. Use cautiously, supplying the wrong derivative will
1192        not be intercepted.
1193
1194    Notes
1195    -----
1196    For simple mathematical operations it can be practical to use anonymous
1197    functions. For the ratio of two observables one can e.g. use
1198
1199    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1200    """
1201
1202    data = np.asarray(data)
1203    raveled_data = data.ravel()
1204
1205    # Workaround for matrix operations containing non Obs data
1206    if not all(isinstance(x, Obs) for x in raveled_data):
1207        for i in range(len(raveled_data)):
1208            if isinstance(raveled_data[i], (int, float)):
1209                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1210
1211    allcov = {}
1212    for o in raveled_data:
1213        for name in o.cov_names:
1214            if name in allcov:
1215                if not np.allclose(allcov[name], o.covobs[name].cov):
1216                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1217            else:
1218                allcov[name] = o.covobs[name].cov
1219
1220    n_obs = len(raveled_data)
1221    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1222    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1223    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1224
1225    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1226
1227    if data.ndim == 1:
1228        values = np.array([o.value for o in data])
1229    else:
1230        values = np.vectorize(lambda x: x.value)(data)
1231
1232    new_values = func(values, **kwargs)
1233
1234    multi = int(isinstance(new_values, np.ndarray))
1235
1236    new_r_values = {}
1237    new_idl_d = {}
1238    for name in new_sample_names:
1239        idl = []
1240        tmp_values = np.zeros(n_obs)
1241        for i, item in enumerate(raveled_data):
1242            tmp_values[i] = item.r_values.get(name, item.value)
1243            tmp_idl = item.idl.get(name)
1244            if tmp_idl is not None:
1245                idl.append(tmp_idl)
1246        if multi > 0:
1247            tmp_values = np.array(tmp_values).reshape(data.shape)
1248        new_r_values[name] = func(tmp_values, **kwargs)
1249        new_idl_d[name] = _merge_idx(idl)
1250
1251    def _compute_scalefactor_missing_rep(obs):
1252        """
1253        Computes the scale factor that is to be multiplied with the deltas
1254        in the case where Obs with different subsets of replica are merged.
1255        Returns a dictionary with the scale factor for each Monte Carlo name.
1256
1257        Parameters
1258        ----------
1259        obs : Obs
1260            The observable corresponding to the deltas that are to be scaled
1261        """
1262        scalef_d = {}
1263        for mc_name in obs.mc_names:
1264            mc_idl_d = [name for name in obs.idl if name.startswith(mc_name + '|')]
1265            new_mc_idl_d = [name for name in new_idl_d if name.startswith(mc_name + '|')]
1266            if len(mc_idl_d) > 0 and len(mc_idl_d) < len(new_mc_idl_d):
1267                scalef_d[mc_name] = sum([len(new_idl_d[name]) for name in new_mc_idl_d]) / sum([len(new_idl_d[name]) for name in mc_idl_d])
1268        return scalef_d
1269
1270    if 'man_grad' in kwargs:
1271        deriv = np.asarray(kwargs.get('man_grad'))
1272        if new_values.shape + data.shape != deriv.shape:
1273            raise ValueError('Manual derivative does not have correct shape.')
1274    elif kwargs.get('num_grad') is True:
1275        if multi > 0:
1276            raise Exception('Multi mode currently not supported for numerical derivative')
1277        options = {
1278            'base_step': 0.1,
1279            'step_ratio': 2.5}
1280        for key in options.keys():
1281            kwarg = kwargs.get(key)
1282            if kwarg is not None:
1283                options[key] = kwarg
1284        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1285        if tmp_df.size == 1:
1286            deriv = np.array([tmp_df.real])
1287        else:
1288            deriv = tmp_df.real
1289    else:
1290        deriv = jacobian(func)(values, **kwargs)
1291
1292    final_result = np.zeros(new_values.shape, dtype=object)
1293
1294    if array_mode is True:
1295
1296        class _Zero_grad():
1297            def __init__(self, N):
1298                self.grad = np.zeros((N, 1))
1299
1300        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1301        d_extracted = {}
1302        g_extracted = {}
1303        for name in new_sample_names:
1304            d_extracted[name] = []
1305            ens_length = len(new_idl_d[name])
1306            for i_dat, dat in enumerate(data):
1307                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name], _compute_scalefactor_missing_rep(o).get(name.split('|')[0], 1)) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1308        for name in new_cov_names:
1309            g_extracted[name] = []
1310            zero_grad = _Zero_grad(new_covobs_lengths[name])
1311            for i_dat, dat in enumerate(data):
1312                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1313
1314    for i_val, new_val in np.ndenumerate(new_values):
1315        new_deltas = {}
1316        new_grad = {}
1317        if array_mode is True:
1318            for name in new_sample_names:
1319                ens_length = d_extracted[name][0].shape[-1]
1320                new_deltas[name] = np.zeros(ens_length)
1321                for i_dat, dat in enumerate(d_extracted[name]):
1322                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1323            for name in new_cov_names:
1324                new_grad[name] = 0
1325                for i_dat, dat in enumerate(g_extracted[name]):
1326                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1327        else:
1328            for j_obs, obs in np.ndenumerate(data):
1329                scalef_d = _compute_scalefactor_missing_rep(obs)
1330                for name in obs.names:
1331                    if name in obs.cov_names:
1332                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1333                    else:
1334                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name], scalef_d.get(name.split('|')[0], 1))
1335
1336        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1337
1338        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1339            raise ValueError('The same name has been used for deltas and covobs!')
1340        new_samples = []
1341        new_means = []
1342        new_idl = []
1343        new_names_obs = []
1344        for name in new_names:
1345            if name not in new_covobs:
1346                new_samples.append(new_deltas[name])
1347                new_idl.append(new_idl_d[name])
1348                new_means.append(new_r_values[name][i_val])
1349                new_names_obs.append(name)
1350        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1351        for name in new_covobs:
1352            final_result[i_val].names.append(name)
1353        final_result[i_val]._covobs = new_covobs
1354        final_result[i_val]._value = new_val
1355        final_result[i_val].reweighted = reweighted
1356
1357    if multi == 0:
1358        final_result = final_result.item()
1359
1360    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs):
1392def reweight(weight, obs, **kwargs):
1393    """Reweight a list of observables.
1394
1395    Parameters
1396    ----------
1397    weight : Obs
1398        Reweighting factor. An Observable that has to be defined on a superset of the
1399        configurations in obs[i].idl for all i.
1400    obs : list
1401        list of Obs, e.g. [obs1, obs2, obs3].
1402    all_configs : bool
1403        if True, the reweighted observables are normalized by the average of
1404        the reweighting factor on all configurations in weight.idl and not
1405        on the configurations in obs[i].idl. Default False.
1406    """
1407    result = []
1408    for i in range(len(obs)):
1409        if len(obs[i].cov_names):
1410            raise ValueError('Error: Not possible to reweight an Obs that contains covobs!')
1411        if not set(obs[i].names).issubset(weight.names):
1412            raise ValueError('Error: Ensembles do not fit')
1413        if len(obs[i].mc_names) > 1 or len(weight.mc_names) > 1:
1414            raise ValueError('Error: Cannot reweight an Obs that contains multiple ensembles.')
1415        for name in obs[i].names:
1416            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1417                raise ValueError('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1418        new_samples = []
1419        w_deltas = {}
1420        for name in sorted(obs[i].names):
1421            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1422            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1423        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1424
1425        if kwargs.get('all_configs'):
1426            new_weight = weight
1427        else:
1428            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1429
1430        result.append(tmp_obs / new_weight)
1431        result[-1].reweighted = True
1432
1433    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def correlate(obs_a, obs_b):
1436def correlate(obs_a, obs_b):
1437    """Correlate two observables.
1438
1439    Parameters
1440    ----------
1441    obs_a : Obs
1442        First observable
1443    obs_b : Obs
1444        Second observable
1445
1446    Notes
1447    -----
1448    Keep in mind to only correlate primary observables which have not been reweighted
1449    yet. The reweighting has to be applied after correlating the observables.
1450    Only works if a single ensemble is present in the Obs.
1451    Currently only works if ensemble content is identical (this is not strictly necessary).
1452    """
1453
1454    if len(obs_a.mc_names) > 1 or len(obs_b.mc_names) > 1:
1455        raise ValueError('Error: Cannot correlate Obs that contain multiple ensembles.')
1456    if sorted(obs_a.names) != sorted(obs_b.names):
1457        raise ValueError(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1458    if len(obs_a.cov_names) or len(obs_b.cov_names):
1459        raise ValueError('Error: Not possible to correlate Obs that contain covobs!')
1460    for name in obs_a.names:
1461        if obs_a.shape[name] != obs_b.shape[name]:
1462            raise ValueError('Shapes of ensemble', name, 'do not fit')
1463        if obs_a.idl[name] != obs_b.idl[name]:
1464            raise ValueError('idl of ensemble', name, 'do not fit')
1465
1466    if obs_a.reweighted is True:
1467        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1468    if obs_b.reweighted is True:
1469        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1470
1471    new_samples = []
1472    new_idl = []
1473    for name in sorted(obs_a.names):
1474        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1475        new_idl.append(obs_a.idl[name])
1476
1477    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1478    o.reweighted = obs_a.reweighted or obs_b.reweighted
1479    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Only works if a single ensemble is present in the Obs. Currently only works if ensemble content is identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1482def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1483    r'''Calculates the error covariance matrix of a set of observables.
1484
1485    WARNING: This function should be used with care, especially for observables with support on multiple
1486             ensembles with differing autocorrelations. See the notes below for details.
1487
1488    The gamma method has to be applied first to all observables.
1489
1490    Parameters
1491    ----------
1492    obs : list or numpy.ndarray
1493        List or one dimensional array of Obs
1494    visualize : bool
1495        If True plots the corresponding normalized correlation matrix (default False).
1496    correlation : bool
1497        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1498    smooth : None or int
1499        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1500        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1501        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1502        small ones.
1503
1504    Notes
1505    -----
1506    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1507    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1508    in the absence of autocorrelation.
1509    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1510    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1511    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1512    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1513    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1514    '''
1515
1516    length = len(obs)
1517
1518    max_samples = np.max([o.N for o in obs])
1519    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1520        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1521
1522    cov = np.zeros((length, length))
1523    for i in range(length):
1524        for j in range(i, length):
1525            cov[i, j] = _covariance_element(obs[i], obs[j])
1526    cov = cov + cov.T - np.diag(np.diag(cov))
1527
1528    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1529
1530    if isinstance(smooth, int):
1531        corr = _smooth_eigenvalues(corr, smooth)
1532
1533    if visualize:
1534        plt.matshow(corr, vmin=-1, vmax=1)
1535        plt.set_cmap('RdBu')
1536        plt.colorbar()
1537        plt.draw()
1538
1539    if correlation is True:
1540        return corr
1541
1542    errors = [o.dvalue for o in obs]
1543    cov = np.diag(errors) @ corr @ np.diag(errors)
1544
1545    eigenvalues = np.linalg.eigh(cov)[0]
1546    if not np.all(eigenvalues >= 0):
1547        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1548
1549    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def invert_corr_cov_cholesky(corr, inverrdiag):
1552def invert_corr_cov_cholesky(corr, inverrdiag):
1553    """Constructs a lower triangular matrix `chol` via the Cholesky decomposition of the correlation matrix `corr`
1554       and then returns the inverse covariance matrix `chol_inv` as a lower triangular matrix by solving `chol * x = inverrdiag`.
1555
1556    Parameters
1557    ----------
1558    corr : np.ndarray
1559           correlation matrix
1560    inverrdiag : np.ndarray
1561              diagonal matrix, the entries are the inverse errors of the data points considered
1562    """
1563
1564    condn = np.linalg.cond(corr)
1565    if condn > 0.1 / np.finfo(float).eps:
1566        raise ValueError(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
1567    if condn > 1e13:
1568        warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
1569    chol = np.linalg.cholesky(corr)
1570    chol_inv = scipy.linalg.solve_triangular(chol, inverrdiag, lower=True)
1571
1572    return chol_inv

Constructs a lower triangular matrix chol via the Cholesky decomposition of the correlation matrix corr and then returns the inverse covariance matrix chol_inv as a lower triangular matrix by solving chol * x = inverrdiag.

Parameters
  • corr (np.ndarray): correlation matrix
  • inverrdiag (np.ndarray): diagonal matrix, the entries are the inverse errors of the data points considered
def sort_corr(corr, kl, yd):
1575def sort_corr(corr, kl, yd):
1576    """ Reorders a correlation matrix to match the alphabetical order of its underlying y data.
1577
1578    The ordering of the input correlation matrix `corr` is given by the list of keys `kl`.
1579    The input dictionary `yd` (with the same keys `kl`) must contain the corresponding y data
1580    that the correlation matrix is based on.
1581    This function sorts the list of keys `kl` alphabetically and sorts the matrix `corr`
1582    according to this alphabetical order such that the sorted matrix `corr_sorted` corresponds
1583    to the y data `yd` when arranged in an alphabetical order by its keys.
1584
1585    Parameters
1586    ----------
1587    corr : np.ndarray
1588        A square correlation matrix constructed using the order of the y data specified by `kl`.
1589        The dimensions of `corr` should match the total number of y data points in `yd` combined.
1590    kl : list of str
1591        A list of keys that denotes the order in which the y data from `yd` was used to build the
1592        input correlation matrix `corr`.
1593    yd : dict of list
1594        A dictionary where each key corresponds to a unique identifier, and its value is a list of
1595        y data points. The total number of y data points across all keys must match the dimensions
1596        of `corr`. The lists in the dictionary can be lists of Obs.
1597
1598    Returns
1599    -------
1600    np.ndarray
1601        A new, sorted correlation matrix that corresponds to the y data from `yd` when arranged alphabetically by its keys.
1602
1603    Example
1604    -------
1605    >>> import numpy as np
1606    >>> import pyerrors as pe
1607    >>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
1608    >>> kl = ['b', 'a']
1609    >>> yd = {'a': [1, 2], 'b': [3]}
1610    >>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
1611    >>> print(sorted_corr)
1612    array([[1. , 0.3, 0.4],
1613           [0.3, 1. , 0.2],
1614           [0.4, 0.2, 1. ]])
1615
1616    """
1617    kl_sorted = sorted(kl)
1618
1619    posd = {}
1620    ofs = 0
1621    for ki, k in enumerate(kl):
1622        posd[k] = [i + ofs for i in range(len(yd[k]))]
1623        ofs += len(posd[k])
1624
1625    mapping = []
1626    for k in kl_sorted:
1627        for i in range(len(yd[k])):
1628            mapping.append(posd[k][i])
1629
1630    corr_sorted = np.zeros_like(corr)
1631    for i in range(corr.shape[0]):
1632        for j in range(corr.shape[0]):
1633            corr_sorted[i][j] = corr[mapping[i]][mapping[j]]
1634
1635    return corr_sorted

Reorders a correlation matrix to match the alphabetical order of its underlying y data.

The ordering of the input correlation matrix corr is given by the list of keys kl. The input dictionary yd (with the same keys kl) must contain the corresponding y data that the correlation matrix is based on. This function sorts the list of keys kl alphabetically and sorts the matrix corr according to this alphabetical order such that the sorted matrix corr_sorted corresponds to the y data yd when arranged in an alphabetical order by its keys.

Parameters
  • corr (np.ndarray): A square correlation matrix constructed using the order of the y data specified by kl. The dimensions of corr should match the total number of y data points in yd combined.
  • kl (list of str): A list of keys that denotes the order in which the y data from yd was used to build the input correlation matrix corr.
  • yd (dict of list): A dictionary where each key corresponds to a unique identifier, and its value is a list of y data points. The total number of y data points across all keys must match the dimensions of corr. The lists in the dictionary can be lists of Obs.
Returns
  • np.ndarray: A new, sorted correlation matrix that corresponds to the y data from yd when arranged alphabetically by its keys.
Example
>>> import numpy as np
>>> import pyerrors as pe
>>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
>>> kl = ['b', 'a']
>>> yd = {'a': [1, 2], 'b': [3]}
>>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
>>> print(sorted_corr)
array([[1. , 0.3, 0.4],
       [0.3, 1. , 0.2],
       [0.4, 0.2, 1. ]])
def import_jackknife(jacks, name, idl=None):
1715def import_jackknife(jacks, name, idl=None):
1716    """Imports jackknife samples and returns an Obs
1717
1718    Parameters
1719    ----------
1720    jacks : numpy.ndarray
1721        numpy array containing the mean value as zeroth entry and
1722        the N jackknife samples as first to Nth entry.
1723    name : str
1724        name of the ensemble the samples are defined on.
1725    """
1726    length = len(jacks) - 1
1727    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1728    samples = jacks[1:] @ prj
1729    mean = np.mean(samples)
1730    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1731    new_obs._value = jacks[0]
1732    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def import_bootstrap(boots, name, random_numbers):
1735def import_bootstrap(boots, name, random_numbers):
1736    """Imports bootstrap samples and returns an Obs
1737
1738    Parameters
1739    ----------
1740    boots : numpy.ndarray
1741        numpy array containing the mean value as zeroth entry and
1742        the N bootstrap samples as first to Nth entry.
1743    name : str
1744        name of the ensemble the samples are defined on.
1745    random_numbers : np.ndarray
1746        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1747        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1748        chain to be reconstructed.
1749    """
1750    samples, length = random_numbers.shape
1751    if samples != len(boots) - 1:
1752        raise ValueError("Random numbers do not have the correct shape.")
1753
1754    if samples < length:
1755        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1756
1757    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1758
1759    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1760    ret = Obs([samples], [name])
1761    ret._value = boots[0]
1762    return ret

Imports bootstrap samples and returns an Obs

Parameters
  • boots (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N bootstrap samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
  • random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, where samples is the number of bootstrap samples and length is the length of the original Monte Carlo chain to be reconstructed.
def merge_obs(list_of_obs):
1765def merge_obs(list_of_obs):
1766    """Combine all observables in list_of_obs into one new observable.
1767    This allows to merge Obs that have been computed on multiple replica
1768    of the same ensemble.
1769    If you like to merge Obs that are based on several ensembles, please
1770    average them yourself.
1771
1772    Parameters
1773    ----------
1774    list_of_obs : list
1775        list of the Obs object to be combined
1776
1777    Notes
1778    -----
1779    It is not possible to combine obs which are based on the same replicum
1780    """
1781    replist = [item for obs in list_of_obs for item in obs.names]
1782    if (len(replist) == len(set(replist))) is False:
1783        raise ValueError('list_of_obs contains duplicate replica: %s' % (str(replist)))
1784    if any([len(o.cov_names) for o in list_of_obs]):
1785        raise ValueError('Not possible to merge data that contains covobs!')
1786    new_dict = {}
1787    idl_dict = {}
1788    for o in list_of_obs:
1789        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1790                        for key in set(o.deltas) | set(o.r_values)})
1791        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1792
1793    names = sorted(new_dict.keys())
1794    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1795    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1796    return o

Combine all observables in list_of_obs into one new observable. This allows to merge Obs that have been computed on multiple replica of the same ensemble. If you like to merge Obs that are based on several ensembles, please average them yourself.

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None):
1799def cov_Obs(means, cov, name, grad=None):
1800    """Create an Obs based on mean(s) and a covariance matrix
1801
1802    Parameters
1803    ----------
1804    mean : list of floats or float
1805        N mean value(s) of the new Obs
1806    cov : list or array
1807        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1808    name : str
1809        identifier for the covariance matrix
1810    grad : list or array
1811        Gradient of the Covobs wrt. the means belonging to cov.
1812    """
1813
1814    def covobs_to_obs(co):
1815        """Make an Obs out of a Covobs
1816
1817        Parameters
1818        ----------
1819        co : Covobs
1820            Covobs to be embedded into the Obs
1821        """
1822        o = Obs([], [], means=[])
1823        o._value = co.value
1824        o.names.append(co.name)
1825        o._covobs[co.name] = co
1826        o._dvalue = np.sqrt(co.errsq())
1827        return o
1828
1829    ol = []
1830    if isinstance(means, (float, int)):
1831        means = [means]
1832
1833    for i in range(len(means)):
1834        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1835    if ol[0].covobs[name].N != len(means):
1836        raise ValueError('You have to provide %d mean values!' % (ol[0].N))
1837    if len(ol) == 1:
1838        return ol[0]
1839    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.