pyerrors.obs

View Source

   1import warnings
   2import hashlib
   3import pickle
   4import numpy as np
   5import autograd.numpy as anp  # Thinly-wrapped numpy
   6import scipy
   7from autograd import jacobian
   8import matplotlib.pyplot as plt
   9from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  10import numdifftools as nd
  11from itertools import groupby
  12from .covobs import Covobs
  13
  14# Improve print output of numpy.ndarrays containing Obs objects.
  15np.set_printoptions(formatter={'object': lambda x: str(x)})
  16
  17
  18class Obs:
  19    """Class for a general observable.
  20
  21    Instances of Obs are the basic objects of a pyerrors error analysis.
  22    They are initialized with a list which contains arrays of samples for
  23    different ensembles/replica and another list of same length which contains
  24    the names of the ensembles/replica. Mathematical operations can be
  25    performed on instances. The result is another instance of Obs. The error of
  26    an instance can be computed with the gamma_method. Also contains additional
  27    methods for output and visualization of the error calculation.
  28
  29    Attributes
  30    ----------
  31    S_global : float
  32        Standard value for S (default 2.0)
  33    S_dict : dict
  34        Dictionary for S values. If an entry for a given ensemble
  35        exists this overwrites the standard value for that ensemble.
  36    tau_exp_global : float
  37        Standard value for tau_exp (default 0.0)
  38    tau_exp_dict : dict
  39        Dictionary for tau_exp values. If an entry for a given ensemble exists
  40        this overwrites the standard value for that ensemble.
  41    N_sigma_global : float
  42        Standard value for N_sigma (default 1.0)
  43    N_sigma_dict : dict
  44        Dictionary for N_sigma values. If an entry for a given ensemble exists
  45        this overwrites the standard value for that ensemble.
  46    """
  47    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  48                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  49                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  50                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  51                 'idl', 'tag', '_covobs', '__dict__']
  52
  53    S_global = 2.0
  54    S_dict = {}
  55    tau_exp_global = 0.0
  56    tau_exp_dict = {}
  57    N_sigma_global = 1.0
  58    N_sigma_dict = {}
  59
  60    def __init__(self, samples, names, idl=None, **kwargs):
  61        """ Initialize Obs object.
  62
  63        Parameters
  64        ----------
  65        samples : list
  66            list of numpy arrays containing the Monte Carlo samples
  67        names : list
  68            list of strings labeling the individual samples
  69        idl : list, optional
  70            list of ranges or lists on which the samples are defined
  71        """
  72
  73        if kwargs.get("means") is None and len(samples):
  74            if len(samples) != len(names):
  75                raise ValueError('Length of samples and names incompatible.')
  76            if idl is not None:
  77                if len(idl) != len(names):
  78                    raise ValueError('Length of idl incompatible with samples and names.')
  79            name_length = len(names)
  80            if name_length > 1:
  81                if name_length != len(set(names)):
  82                    raise ValueError('Names are not unique.')
  83                if not all(isinstance(x, str) for x in names):
  84                    raise TypeError('All names have to be strings.')
  85            else:
  86                if not isinstance(names[0], str):
  87                    raise TypeError('All names have to be strings.')
  88            if min(len(x) for x in samples) <= 4:
  89                raise ValueError('Samples have to have at least 5 entries.')
  90
  91        self.names = sorted(names)
  92        self.shape = {}
  93        self.r_values = {}
  94        self.deltas = {}
  95        self._covobs = {}
  96
  97        self._value = 0
  98        self.N = 0
  99        self.idl = {}
 100        if idl is not None:
 101            for name, idx in sorted(zip(names, idl)):
 102                if isinstance(idx, range):
 103                    self.idl[name] = idx
 104                elif isinstance(idx, (list, np.ndarray)):
 105                    dc = np.unique(np.diff(idx))
 106                    if np.any(dc < 0):
 107                        raise ValueError("Unsorted idx for idl[%s]" % (name))
 108                    if len(dc) == 1:
 109                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 110                    else:
 111                        self.idl[name] = list(idx)
 112                else:
 113                    raise TypeError('incompatible type for idl[%s].' % (name))
 114        else:
 115            for name, sample in sorted(zip(names, samples)):
 116                self.idl[name] = range(1, len(sample) + 1)
 117
 118        if kwargs.get("means") is not None:
 119            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 120                self.shape[name] = len(self.idl[name])
 121                self.N += self.shape[name]
 122                self.r_values[name] = mean
 123                self.deltas[name] = sample
 124        else:
 125            for name, sample in sorted(zip(names, samples)):
 126                self.shape[name] = len(self.idl[name])
 127                self.N += self.shape[name]
 128                if len(sample) != self.shape[name]:
 129                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 130                self.r_values[name] = np.mean(sample)
 131                self.deltas[name] = sample - self.r_values[name]
 132                self._value += self.shape[name] * self.r_values[name]
 133            self._value /= self.N
 134
 135        self._dvalue = 0.0
 136        self.ddvalue = 0.0
 137        self.reweighted = False
 138
 139        self.tag = None
 140
 141    @property
 142    def value(self):
 143        return self._value
 144
 145    @property
 146    def dvalue(self):
 147        return self._dvalue
 148
 149    @property
 150    def e_names(self):
 151        return sorted(set([o.split('|')[0] for o in self.names]))
 152
 153    @property
 154    def cov_names(self):
 155        return sorted(set([o for o in self.covobs.keys()]))
 156
 157    @property
 158    def mc_names(self):
 159        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 160
 161    @property
 162    def e_content(self):
 163        res = {}
 164        for e, e_name in enumerate(self.e_names):
 165            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 166            if e_name in self.names:
 167                res[e_name].append(e_name)
 168        return res
 169
 170    @property
 171    def covobs(self):
 172        return self._covobs
 173
 174    def gamma_method(self, **kwargs):
 175        """Estimate the error and related properties of the Obs.
 176
 177        Parameters
 178        ----------
 179        S : float
 180            specifies a custom value for the parameter S (default 2.0).
 181            If set to 0 it is assumed that the data exhibits no
 182            autocorrelation. In this case the error estimates coincides
 183            with the sample standard error.
 184        tau_exp : float
 185            positive value triggers the critical slowing down analysis
 186            (default 0.0).
 187        N_sigma : float
 188            number of standard deviations from zero until the tail is
 189            attached to the autocorrelation function (default 1).
 190        fft : bool
 191            determines whether the fft algorithm is used for the computation
 192            of the autocorrelation function (default True)
 193        """
 194
 195        e_content = self.e_content
 196        self.e_dvalue = {}
 197        self.e_ddvalue = {}
 198        self.e_tauint = {}
 199        self.e_dtauint = {}
 200        self.e_windowsize = {}
 201        self.e_n_tauint = {}
 202        self.e_n_dtauint = {}
 203        e_gamma = {}
 204        self.e_rho = {}
 205        self.e_drho = {}
 206        self._dvalue = 0
 207        self.ddvalue = 0
 208
 209        self.S = {}
 210        self.tau_exp = {}
 211        self.N_sigma = {}
 212
 213        if kwargs.get('fft') is False:
 214            fft = False
 215        else:
 216            fft = True
 217
 218        def _parse_kwarg(kwarg_name):
 219            if kwarg_name in kwargs:
 220                tmp = kwargs.get(kwarg_name)
 221                if isinstance(tmp, (int, float)):
 222                    if tmp < 0:
 223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 224                    for e, e_name in enumerate(self.e_names):
 225                        getattr(self, kwarg_name)[e_name] = tmp
 226                else:
 227                    raise TypeError(kwarg_name + ' is not in proper format.')
 228            else:
 229                for e, e_name in enumerate(self.e_names):
 230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 232                    else:
 233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 234
 235        _parse_kwarg('S')
 236        _parse_kwarg('tau_exp')
 237        _parse_kwarg('N_sigma')
 238
 239        for e, e_name in enumerate(self.mc_names):
 240            gapsize = _determine_gap(self, e_content, e_name)
 241
 242            r_length = []
 243            for r_name in e_content[e_name]:
 244                if isinstance(self.idl[r_name], range):
 245                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
 246                else:
 247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
 248
 249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 250            w_max = max(r_length) // 2
 251            e_gamma[e_name] = np.zeros(w_max)
 252            self.e_rho[e_name] = np.zeros(w_max)
 253            self.e_drho[e_name] = np.zeros(w_max)
 254
 255            for r_name in e_content[e_name]:
 256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 257
 258            gamma_div = np.zeros(w_max)
 259            for r_name in e_content[e_name]:
 260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 261            gamma_div[gamma_div < 1] = 1.0
 262            e_gamma[e_name] /= gamma_div[:w_max]
 263
 264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 265                self.e_tauint[e_name] = 0.5
 266                self.e_dtauint[e_name] = 0.0
 267                self.e_dvalue[e_name] = 0.0
 268                self.e_ddvalue[e_name] = 0.0
 269                self.e_windowsize[e_name] = 0
 270                continue
 271
 272            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 273            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 274            # Make sure no entry of tauint is smaller than 0.5
 275            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 276            # hep-lat/0306017 eq. (42)
 277            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 278            self.e_n_dtauint[e_name][0] = 0.0
 279
 280            def _compute_drho(i):
 281                tmp = (self.e_rho[e_name][i + 1:w_max]
 282                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
 283                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
 284                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
 285                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 286
 287            if self.tau_exp[e_name] > 0:
 288                _compute_drho(1)
 289                texp = self.tau_exp[e_name]
 290                # Critical slowing down analysis
 291                if w_max // 2 <= 1:
 292                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 293                for n in range(1, w_max // 2):
 294                    _compute_drho(n + 1)
 295                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 296                        # Bias correction hep-lat/0306017 eq. (49) included
 297                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 298                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 299                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 300                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 301                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 302                        self.e_windowsize[e_name] = n
 303                        break
 304            else:
 305                if self.S[e_name] == 0.0:
 306                    self.e_tauint[e_name] = 0.5
 307                    self.e_dtauint[e_name] = 0.0
 308                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 309                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 310                    self.e_windowsize[e_name] = 0
 311                else:
 312                    # Standard automatic windowing procedure
 313                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 314                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 315                    for n in range(1, w_max):
 316                        if g_w[n - 1] < 0 or n >= w_max - 1:
 317                            _compute_drho(n)
 318                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 319                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 320                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 321                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 322                            self.e_windowsize[e_name] = n
 323                            break
 324
 325            self._dvalue += self.e_dvalue[e_name] ** 2
 326            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 327
 328        for e_name in self.cov_names:
 329            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 330            self.e_ddvalue[e_name] = 0
 331            self._dvalue += self.e_dvalue[e_name]**2
 332
 333        self._dvalue = np.sqrt(self._dvalue)
 334        if self._dvalue == 0.0:
 335            self.ddvalue = 0.0
 336        else:
 337            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 338        return
 339
 340    gm = gamma_method
 341
 342    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
 343        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 344           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 345
 346        Parameters
 347        ----------
 348        deltas : list
 349            List of fluctuations
 350        idx : list
 351            List or range of configurations on which the deltas are defined.
 352        shape : int
 353            Number of configurations in idx.
 354        w_max : int
 355            Upper bound for the summation window.
 356        fft : bool
 357            determines whether the fft algorithm is used for the computation
 358            of the autocorrelation function.
 359        gapsize : int
 360            The target distance between two configurations. If longer distances
 361            are found in idx, the data is expanded.
 362        """
 363        gamma = np.zeros(w_max)
 364        deltas = _expand_deltas(deltas, idx, shape, gapsize)
 365        new_shape = len(deltas)
 366        if fft:
 367            max_gamma = min(new_shape, w_max)
 368            # The padding for the fft has to be even
 369            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 370            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 371        else:
 372            for n in range(w_max):
 373                if new_shape - n >= 0:
 374                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 375
 376        return gamma
 377
 378    def details(self, ens_content=True):
 379        """Output detailed properties of the Obs.
 380
 381        Parameters
 382        ----------
 383        ens_content : bool
 384            print details about the ensembles and replica if true.
 385        """
 386        if self.tag is not None:
 387            print("Description:", self.tag)
 388        if not hasattr(self, 'e_dvalue'):
 389            print('Result\t %3.8e' % (self.value))
 390        else:
 391            if self.value == 0.0:
 392                percentage = np.nan
 393            else:
 394                percentage = np.abs(self._dvalue / self.value) * 100
 395            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 396            if len(self.e_names) > 1:
 397                print(' Ensemble errors:')
 398            e_content = self.e_content
 399            for e_name in self.mc_names:
 400                gap = _determine_gap(self, e_content, e_name)
 401
 402                if len(self.e_names) > 1:
 403                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 404                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 405                tau_string += f" in units of {gap} config"
 406                if gap > 1:
 407                    tau_string += "s"
 408                if self.tau_exp[e_name] > 0:
 409                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 410                else:
 411                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 412                print(tau_string)
 413            for e_name in self.cov_names:
 414                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 415        if ens_content is True:
 416            if len(self.e_names) == 1:
 417                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 418            else:
 419                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 420            my_string_list = []
 421            for key, value in sorted(self.e_content.items()):
 422                if key not in self.covobs:
 423                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 424                    if len(value) == 1:
 425                        my_string += f': {self.shape[value[0]]} configurations'
 426                        if isinstance(self.idl[value[0]], range):
 427                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 428                        else:
 429                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 430                    else:
 431                        sublist = []
 432                        for v in value:
 433                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 434                            my_substring += f': {self.shape[v]} configurations'
 435                            if isinstance(self.idl[v], range):
 436                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 437                            else:
 438                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 439                            sublist.append(my_substring)
 440
 441                        my_string += '\n' + '\n'.join(sublist)
 442                else:
 443                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 444                my_string_list.append(my_string)
 445            print('\n'.join(my_string_list))
 446
 447    def reweight(self, weight):
 448        """Reweight the obs with given rewighting factors.
 449
 450        Parameters
 451        ----------
 452        weight : Obs
 453            Reweighting factor. An Observable that has to be defined on a superset of the
 454            configurations in obs[i].idl for all i.
 455        all_configs : bool
 456            if True, the reweighted observables are normalized by the average of
 457            the reweighting factor on all configurations in weight.idl and not
 458            on the configurations in obs[i].idl. Default False.
 459        """
 460        return reweight(weight, [self])[0]
 461
 462    def is_zero_within_error(self, sigma=1):
 463        """Checks whether the observable is zero within 'sigma' standard errors.
 464
 465        Parameters
 466        ----------
 467        sigma : int
 468            Number of standard errors used for the check.
 469
 470        Works only properly when the gamma method was run.
 471        """
 472        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 473
 474    def is_zero(self, atol=1e-10):
 475        """Checks whether the observable is zero within a given tolerance.
 476
 477        Parameters
 478        ----------
 479        atol : float
 480            Absolute tolerance (for details see numpy documentation).
 481        """
 482        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 483
 484    def plot_tauint(self, save=None):
 485        """Plot integrated autocorrelation time for each ensemble.
 486
 487        Parameters
 488        ----------
 489        save : str
 490            saves the figure to a file named 'save' if.
 491        """
 492        if not hasattr(self, 'e_dvalue'):
 493            raise Exception('Run the gamma method first.')
 494
 495        for e, e_name in enumerate(self.mc_names):
 496            fig = plt.figure()
 497            plt.xlabel(r'$W$')
 498            plt.ylabel(r'$\tau_\mathrm{int}$')
 499            length = int(len(self.e_n_tauint[e_name]))
 500            if self.tau_exp[e_name] > 0:
 501                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 502                x_help = np.arange(2 * self.tau_exp[e_name])
 503                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 504                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 505                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 506                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 507                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 508                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 509                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 510            else:
 511                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 512                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 513
 514            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 515            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 516            plt.legend()
 517            plt.xlim(-0.5, xmax)
 518            ylim = plt.ylim()
 519            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 520            plt.draw()
 521            if save:
 522                fig.savefig(save + "_" + str(e))
 523
 524    def plot_rho(self, save=None):
 525        """Plot normalized autocorrelation function time for each ensemble.
 526
 527        Parameters
 528        ----------
 529        save : str
 530            saves the figure to a file named 'save' if.
 531        """
 532        if not hasattr(self, 'e_dvalue'):
 533            raise Exception('Run the gamma method first.')
 534        for e, e_name in enumerate(self.mc_names):
 535            fig = plt.figure()
 536            plt.xlabel('W')
 537            plt.ylabel('rho')
 538            length = int(len(self.e_drho[e_name]))
 539            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 540            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 541            if self.tau_exp[e_name] > 0:
 542                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 543                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 544                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 545                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 546            else:
 547                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 548                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 549            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 550            plt.xlim(-0.5, xmax)
 551            plt.draw()
 552            if save:
 553                fig.savefig(save + "_" + str(e))
 554
 555    def plot_rep_dist(self):
 556        """Plot replica distribution for each ensemble with more than one replicum."""
 557        if not hasattr(self, 'e_dvalue'):
 558            raise Exception('Run the gamma method first.')
 559        for e, e_name in enumerate(self.mc_names):
 560            if len(self.e_content[e_name]) == 1:
 561                print('No replica distribution for a single replicum (', e_name, ')')
 562                continue
 563            r_length = []
 564            sub_r_mean = 0
 565            for r, r_name in enumerate(self.e_content[e_name]):
 566                r_length.append(len(self.deltas[r_name]))
 567                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 568            e_N = np.sum(r_length)
 569            sub_r_mean /= e_N
 570            arr = np.zeros(len(self.e_content[e_name]))
 571            for r, r_name in enumerate(self.e_content[e_name]):
 572                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 573            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 574            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 575            plt.draw()
 576
 577    def plot_history(self, expand=True):
 578        """Plot derived Monte Carlo history for each ensemble
 579
 580        Parameters
 581        ----------
 582        expand : bool
 583            show expanded history for irregular Monte Carlo chains (default: True).
 584        """
 585        for e, e_name in enumerate(self.mc_names):
 586            plt.figure()
 587            r_length = []
 588            tmp = []
 589            tmp_expanded = []
 590            for r, r_name in enumerate(self.e_content[e_name]):
 591                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 592                if expand:
 593                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
 594                    r_length.append(len(tmp_expanded[-1]))
 595                else:
 596                    r_length.append(len(tmp[-1]))
 597            e_N = np.sum(r_length)
 598            x = np.arange(e_N)
 599            y_test = np.concatenate(tmp, axis=0)
 600            if expand:
 601                y = np.concatenate(tmp_expanded, axis=0)
 602            else:
 603                y = y_test
 604            plt.errorbar(x, y, fmt='.', markersize=3)
 605            plt.xlim(-0.5, e_N - 0.5)
 606            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 607            plt.draw()
 608
 609    def plot_piechart(self, save=None):
 610        """Plot piechart which shows the fractional contribution of each
 611        ensemble to the error and returns a dictionary containing the fractions.
 612
 613        Parameters
 614        ----------
 615        save : str
 616            saves the figure to a file named 'save' if.
 617        """
 618        if not hasattr(self, 'e_dvalue'):
 619            raise Exception('Run the gamma method first.')
 620        if np.isclose(0.0, self._dvalue, atol=1e-15):
 621            raise Exception('Error is 0.0')
 622        labels = self.e_names
 623        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 624        fig1, ax1 = plt.subplots()
 625        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 626        ax1.axis('equal')
 627        plt.draw()
 628        if save:
 629            fig1.savefig(save)
 630
 631        return dict(zip(labels, sizes))
 632
 633    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 634        """Dump the Obs to a file 'name' of chosen format.
 635
 636        Parameters
 637        ----------
 638        filename : str
 639            name of the file to be saved.
 640        datatype : str
 641            Format of the exported file. Supported formats include
 642            "json.gz" and "pickle"
 643        description : str
 644            Description for output file, only relevant for json.gz format.
 645        path : str
 646            specifies a custom path for the file (default '.')
 647        """
 648        if 'path' in kwargs:
 649            file_name = kwargs.get('path') + '/' + filename
 650        else:
 651            file_name = filename
 652
 653        if datatype == "json.gz":
 654            from .input.json import dump_to_json
 655            dump_to_json([self], file_name, description=description)
 656        elif datatype == "pickle":
 657            with open(file_name + '.p', 'wb') as fb:
 658                pickle.dump(self, fb)
 659        else:
 660            raise Exception("Unknown datatype " + str(datatype))
 661
 662    def export_jackknife(self):
 663        """Export jackknife samples from the Obs
 664
 665        Returns
 666        -------
 667        numpy.ndarray
 668            Returns a numpy array of length N + 1 where N is the number of samples
 669            for the given ensemble and replicum. The zeroth entry of the array contains
 670            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 671            derived from the Obs. The current implementation only works for observables
 672            defined on exactly one ensemble and replicum. The derived jackknife samples
 673            should agree with samples from a full jackknife analysis up to O(1/N).
 674        """
 675
 676        if len(self.names) != 1:
 677            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 678
 679        name = self.names[0]
 680        full_data = self.deltas[name] + self.r_values[name]
 681        n = full_data.size
 682        mean = self.value
 683        tmp_jacks = np.zeros(n + 1)
 684        tmp_jacks[0] = mean
 685        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 686        return tmp_jacks
 687
 688    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
 689        """Export bootstrap samples from the Obs
 690
 691        Parameters
 692        ----------
 693        samples : int
 694            Number of bootstrap samples to generate.
 695        random_numbers : np.ndarray
 696            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
 697            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
 698        save_rng : str
 699            Save the random numbers to a file if a path is specified.
 700
 701        Returns
 702        -------
 703        numpy.ndarray
 704            Returns a numpy array of length N + 1 where N is the number of samples
 705            for the given ensemble and replicum. The zeroth entry of the array contains
 706            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
 707            derived from the Obs. The current implementation only works for observables
 708            defined on exactly one ensemble and replicum. The derived bootstrap samples
 709            should agree with samples from a full bootstrap analysis up to O(1/N).
 710        """
 711        if len(self.names) != 1:
 712            raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
 713
 714        name = self.names[0]
 715        length = self.N
 716
 717        if random_numbers is None:
 718            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
 719            rng = np.random.default_rng(seed)
 720            random_numbers = rng.integers(0, length, size=(samples, length))
 721
 722        if save_rng is not None:
 723            np.savetxt(save_rng, random_numbers, fmt='%i')
 724
 725        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
 726        ret = np.zeros(samples + 1)
 727        ret[0] = self.value
 728        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
 729        return ret
 730
 731    def __float__(self):
 732        return float(self.value)
 733
 734    def __repr__(self):
 735        return 'Obs[' + str(self) + ']'
 736
 737    def __str__(self):
 738        return _format_uncertainty(self.value, self._dvalue)
 739
 740    def __format__(self, format_type):
 741        if format_type == "":
 742            significance = 2
 743        else:
 744            significance = int(float(format_type.replace("+", "").replace("-", "")))
 745        my_str = _format_uncertainty(self.value, self._dvalue,
 746                                     significance=significance)
 747        for char in ["+", " "]:
 748            if format_type.startswith(char):
 749                if my_str[0] != "-":
 750                    my_str = char + my_str
 751        return my_str
 752
 753    def __hash__(self):
 754        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 755        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 756        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 757        hash_tuple += tuple([o.encode() for o in self.names])
 758        m = hashlib.md5()
 759        [m.update(o) for o in hash_tuple]
 760        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 761
 762    # Overload comparisons
 763    def __lt__(self, other):
 764        return self.value < other
 765
 766    def __le__(self, other):
 767        return self.value <= other
 768
 769    def __gt__(self, other):
 770        return self.value > other
 771
 772    def __ge__(self, other):
 773        return self.value >= other
 774
 775    def __eq__(self, other):
 776        return (self - other).is_zero()
 777
 778    def __ne__(self, other):
 779        return not (self - other).is_zero()
 780
 781    # Overload math operations
 782    def __add__(self, y):
 783        if isinstance(y, Obs):
 784            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 785        else:
 786            if isinstance(y, np.ndarray):
 787                return np.array([self + o for o in y])
 788            elif y.__class__.__name__ in ['Corr', 'CObs']:
 789                return NotImplemented
 790            else:
 791                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 792
 793    def __radd__(self, y):
 794        return self + y
 795
 796    def __mul__(self, y):
 797        if isinstance(y, Obs):
 798            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 799        else:
 800            if isinstance(y, np.ndarray):
 801                return np.array([self * o for o in y])
 802            elif isinstance(y, complex):
 803                return CObs(self * y.real, self * y.imag)
 804            elif y.__class__.__name__ in ['Corr', 'CObs']:
 805                return NotImplemented
 806            else:
 807                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 808
 809    def __rmul__(self, y):
 810        return self * y
 811
 812    def __sub__(self, y):
 813        if isinstance(y, Obs):
 814            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 815        else:
 816            if isinstance(y, np.ndarray):
 817                return np.array([self - o for o in y])
 818            elif y.__class__.__name__ in ['Corr', 'CObs']:
 819                return NotImplemented
 820            else:
 821                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 822
 823    def __rsub__(self, y):
 824        return -1 * (self - y)
 825
 826    def __pos__(self):
 827        return self
 828
 829    def __neg__(self):
 830        return -1 * self
 831
 832    def __truediv__(self, y):
 833        if isinstance(y, Obs):
 834            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 835        else:
 836            if isinstance(y, np.ndarray):
 837                return np.array([self / o for o in y])
 838            elif y.__class__.__name__ in ['Corr', 'CObs']:
 839                return NotImplemented
 840            else:
 841                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 842
 843    def __rtruediv__(self, y):
 844        if isinstance(y, Obs):
 845            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 846        else:
 847            if isinstance(y, np.ndarray):
 848                return np.array([o / self for o in y])
 849            elif y.__class__.__name__ in ['Corr', 'CObs']:
 850                return NotImplemented
 851            else:
 852                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 853
 854    def __pow__(self, y):
 855        if isinstance(y, Obs):
 856            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 857        else:
 858            return derived_observable(lambda x: x[0] ** y, [self])
 859
 860    def __rpow__(self, y):
 861        if isinstance(y, Obs):
 862            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 863        else:
 864            return derived_observable(lambda x: y ** x[0], [self])
 865
 866    def __abs__(self):
 867        return derived_observable(lambda x: anp.abs(x[0]), [self])
 868
 869    # Overload numpy functions
 870    def sqrt(self):
 871        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 872
 873    def log(self):
 874        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 875
 876    def exp(self):
 877        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 878
 879    def sin(self):
 880        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 881
 882    def cos(self):
 883        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 884
 885    def tan(self):
 886        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 887
 888    def arcsin(self):
 889        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 890
 891    def arccos(self):
 892        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 893
 894    def arctan(self):
 895        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 896
 897    def sinh(self):
 898        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 899
 900    def cosh(self):
 901        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 902
 903    def tanh(self):
 904        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 905
 906    def arcsinh(self):
 907        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 908
 909    def arccosh(self):
 910        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 911
 912    def arctanh(self):
 913        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 914
 915
 916class CObs:
 917    """Class for a complex valued observable."""
 918    __slots__ = ['_real', '_imag', 'tag']
 919
 920    def __init__(self, real, imag=0.0):
 921        self._real = real
 922        self._imag = imag
 923        self.tag = None
 924
 925    @property
 926    def real(self):
 927        return self._real
 928
 929    @property
 930    def imag(self):
 931        return self._imag
 932
 933    def gamma_method(self, **kwargs):
 934        """Executes the gamma_method for the real and the imaginary part."""
 935        if isinstance(self.real, Obs):
 936            self.real.gamma_method(**kwargs)
 937        if isinstance(self.imag, Obs):
 938            self.imag.gamma_method(**kwargs)
 939
 940    def is_zero(self):
 941        """Checks whether both real and imaginary part are zero within machine precision."""
 942        return self.real == 0.0 and self.imag == 0.0
 943
 944    def conjugate(self):
 945        return CObs(self.real, -self.imag)
 946
 947    def __add__(self, other):
 948        if isinstance(other, np.ndarray):
 949            return other + self
 950        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 951            return CObs(self.real + other.real,
 952                        self.imag + other.imag)
 953        else:
 954            return CObs(self.real + other, self.imag)
 955
 956    def __radd__(self, y):
 957        return self + y
 958
 959    def __sub__(self, other):
 960        if isinstance(other, np.ndarray):
 961            return -1 * (other - self)
 962        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 963            return CObs(self.real - other.real, self.imag - other.imag)
 964        else:
 965            return CObs(self.real - other, self.imag)
 966
 967    def __rsub__(self, other):
 968        return -1 * (self - other)
 969
 970    def __mul__(self, other):
 971        if isinstance(other, np.ndarray):
 972            return other * self
 973        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 974            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 975                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 976                                               [self.real, other.real, self.imag, other.imag],
 977                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 978                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 979                                               [self.real, other.real, self.imag, other.imag],
 980                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 981            elif getattr(other, 'imag', 0) != 0:
 982                return CObs(self.real * other.real - self.imag * other.imag,
 983                            self.imag * other.real + self.real * other.imag)
 984            else:
 985                return CObs(self.real * other.real, self.imag * other.real)
 986        else:
 987            return CObs(self.real * other, self.imag * other)
 988
 989    def __rmul__(self, other):
 990        return self * other
 991
 992    def __truediv__(self, other):
 993        if isinstance(other, np.ndarray):
 994            return 1 / (other / self)
 995        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 996            r = other.real ** 2 + other.imag ** 2
 997            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 998        else:
 999            return CObs(self.real / other, self.imag / other)
1000
1001    def __rtruediv__(self, other):
1002        r = self.real ** 2 + self.imag ** 2
1003        if hasattr(other, 'real') and hasattr(other, 'imag'):
1004            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1005        else:
1006            return CObs(self.real * other / r, -self.imag * other / r)
1007
1008    def __abs__(self):
1009        return np.sqrt(self.real**2 + self.imag**2)
1010
1011    def __pos__(self):
1012        return self
1013
1014    def __neg__(self):
1015        return -1 * self
1016
1017    def __eq__(self, other):
1018        return self.real == other.real and self.imag == other.imag
1019
1020    def __str__(self):
1021        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1022
1023    def __repr__(self):
1024        return 'CObs[' + str(self) + ']'
1025
1026    def __format__(self, format_type):
1027        if format_type == "":
1028            significance = 2
1029            format_type = "2"
1030        else:
1031            significance = int(float(format_type.replace("+", "").replace("-", "")))
1032        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"
1033
1034
1035def gamma_method(x, **kwargs):
1036    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1037
1038    See docstring of pe.Obs.gamma_method for details.
1039    """
1040    return np.vectorize(lambda o: o.gm(**kwargs))(x)
1041
1042
1043def gm(x, **kwargs):
1044    """Short version of the vectorized gamma_method.
1045
1046    See docstring of pe.Obs.gamma_method for details
1047    """
1048    return gamma_method(x, **kwargs)
1049
1050
1051def _format_uncertainty(value, dvalue, significance=2):
1052    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
1053    if dvalue == 0.0 or (not np.isfinite(dvalue)):
1054        return str(value)
1055    if not isinstance(significance, int):
1056        raise TypeError("significance needs to be an integer.")
1057    if significance < 1:
1058        raise ValueError("significance needs to be larger than zero.")
1059    fexp = np.floor(np.log10(dvalue))
1060    if fexp < 0.0:
1061        return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f')
1062    elif fexp == 0.0:
1063        return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})"
1064    else:
1065        return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})"
1066
1067
1068def _expand_deltas(deltas, idx, shape, gapsize):
1069    """Expand deltas defined on idx to a regular range with spacing gapsize between two
1070       configurations and where holes are filled by 0.
1071       If idx is of type range, the deltas are not changed if the idx.step == gapsize.
1072
1073    Parameters
1074    ----------
1075    deltas : list
1076        List of fluctuations
1077    idx : list
1078        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1079    shape : int
1080        Number of configs in idx.
1081    gapsize : int
1082        The target distance between two configurations. If longer distances
1083        are found in idx, the data is expanded.
1084    """
1085    if isinstance(idx, range):
1086        if (idx.step == gapsize):
1087            return deltas
1088    ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize)
1089    for i in range(shape):
1090        ret[(idx[i] - idx[0]) // gapsize] = deltas[i]
1091    return ret
1092
1093
1094def _merge_idx(idl):
1095    """Returns the union of all lists in idl as range or sorted list
1096
1097    Parameters
1098    ----------
1099    idl : list
1100        List of lists or ranges.
1101    """
1102
1103    if _check_lists_equal(idl):
1104        return idl[0]
1105
1106    idunion = sorted(set().union(*idl))
1107
1108    # Check whether idunion can be expressed as range
1109    idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0])
1110    idtest = [list(idrange), idunion]
1111    if _check_lists_equal(idtest):
1112        return idrange
1113
1114    return idunion
1115
1116
1117def _intersection_idx(idl):
1118    """Returns the intersection of all lists in idl as range or sorted list
1119
1120    Parameters
1121    ----------
1122    idl : list
1123        List of lists or ranges.
1124    """
1125
1126    if _check_lists_equal(idl):
1127        return idl[0]
1128
1129    idinter = sorted(set.intersection(*[set(o) for o in idl]))
1130
1131    # Check whether idinter can be expressed as range
1132    try:
1133        idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0])
1134        idtest = [list(idrange), idinter]
1135        if _check_lists_equal(idtest):
1136            return idrange
1137    except IndexError:
1138        pass
1139
1140    return idinter
1141
1142
1143def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1144    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1145       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1146       common divisor of the step sizes is used as new step size.
1147
1148    Parameters
1149    ----------
1150    deltas : list
1151        List of fluctuations
1152    idx : list
1153        List or range of configs on which the deltas are defined.
1154        Has to be a subset of new_idx and has to be sorted in ascending order.
1155    shape : list
1156        Number of configs in idx.
1157    new_idx : list
1158        List of configs that defines the new range, has to be sorted in ascending order.
1159    """
1160
1161    if type(idx) is range and type(new_idx) is range:
1162        if idx == new_idx:
1163            return deltas
1164    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1165    for i in range(shape):
1166        ret[idx[i] - new_idx[0]] = deltas[i]
1167    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx)
1168
1169
1170def derived_observable(func, data, array_mode=False, **kwargs):
1171    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1172
1173    Parameters
1174    ----------
1175    func : object
1176        arbitrary function of the form func(data, **kwargs). For the
1177        automatic differentiation to work, all numpy functions have to have
1178        the autograd wrapper (use 'import autograd.numpy as anp').
1179    data : list
1180        list of Obs, e.g. [obs1, obs2, obs3].
1181    num_grad : bool
1182        if True, numerical derivatives are used instead of autograd
1183        (default False). To control the numerical differentiation the
1184        kwargs of numdifftools.step_generators.MaxStepGenerator
1185        can be used.
1186    man_grad : list
1187        manually supply a list or an array which contains the jacobian
1188        of func. Use cautiously, supplying the wrong derivative will
1189        not be intercepted.
1190
1191    Notes
1192    -----
1193    For simple mathematical operations it can be practical to use anonymous
1194    functions. For the ratio of two observables one can e.g. use
1195
1196    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1197    """
1198
1199    data = np.asarray(data)
1200    raveled_data = data.ravel()
1201
1202    # Workaround for matrix operations containing non Obs data
1203    if not all(isinstance(x, Obs) for x in raveled_data):
1204        for i in range(len(raveled_data)):
1205            if isinstance(raveled_data[i], (int, float)):
1206                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1207
1208    allcov = {}
1209    for o in raveled_data:
1210        for name in o.cov_names:
1211            if name in allcov:
1212                if not np.allclose(allcov[name], o.covobs[name].cov):
1213                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1214            else:
1215                allcov[name] = o.covobs[name].cov
1216
1217    n_obs = len(raveled_data)
1218    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1219    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1220    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1221
1222    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1223
1224    if data.ndim == 1:
1225        values = np.array([o.value for o in data])
1226    else:
1227        values = np.vectorize(lambda x: x.value)(data)
1228
1229    new_values = func(values, **kwargs)
1230
1231    multi = int(isinstance(new_values, np.ndarray))
1232
1233    new_r_values = {}
1234    new_idl_d = {}
1235    for name in new_sample_names:
1236        idl = []
1237        tmp_values = np.zeros(n_obs)
1238        for i, item in enumerate(raveled_data):
1239            tmp_values[i] = item.r_values.get(name, item.value)
1240            tmp_idl = item.idl.get(name)
1241            if tmp_idl is not None:
1242                idl.append(tmp_idl)
1243        if multi > 0:
1244            tmp_values = np.array(tmp_values).reshape(data.shape)
1245        new_r_values[name] = func(tmp_values, **kwargs)
1246        new_idl_d[name] = _merge_idx(idl)
1247
1248    if 'man_grad' in kwargs:
1249        deriv = np.asarray(kwargs.get('man_grad'))
1250        if new_values.shape + data.shape != deriv.shape:
1251            raise Exception('Manual derivative does not have correct shape.')
1252    elif kwargs.get('num_grad') is True:
1253        if multi > 0:
1254            raise Exception('Multi mode currently not supported for numerical derivative')
1255        options = {
1256            'base_step': 0.1,
1257            'step_ratio': 2.5}
1258        for key in options.keys():
1259            kwarg = kwargs.get(key)
1260            if kwarg is not None:
1261                options[key] = kwarg
1262        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1263        if tmp_df.size == 1:
1264            deriv = np.array([tmp_df.real])
1265        else:
1266            deriv = tmp_df.real
1267    else:
1268        deriv = jacobian(func)(values, **kwargs)
1269
1270    final_result = np.zeros(new_values.shape, dtype=object)
1271
1272    if array_mode is True:
1273
1274        class _Zero_grad():
1275            def __init__(self, N):
1276                self.grad = np.zeros((N, 1))
1277
1278        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1279        d_extracted = {}
1280        g_extracted = {}
1281        for name in new_sample_names:
1282            d_extracted[name] = []
1283            ens_length = len(new_idl_d[name])
1284            for i_dat, dat in enumerate(data):
1285                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1286        for name in new_cov_names:
1287            g_extracted[name] = []
1288            zero_grad = _Zero_grad(new_covobs_lengths[name])
1289            for i_dat, dat in enumerate(data):
1290                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1291
1292    for i_val, new_val in np.ndenumerate(new_values):
1293        new_deltas = {}
1294        new_grad = {}
1295        if array_mode is True:
1296            for name in new_sample_names:
1297                ens_length = d_extracted[name][0].shape[-1]
1298                new_deltas[name] = np.zeros(ens_length)
1299                for i_dat, dat in enumerate(d_extracted[name]):
1300                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1301            for name in new_cov_names:
1302                new_grad[name] = 0
1303                for i_dat, dat in enumerate(g_extracted[name]):
1304                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1305        else:
1306            for j_obs, obs in np.ndenumerate(data):
1307                for name in obs.names:
1308                    if name in obs.cov_names:
1309                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1310                    else:
1311                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1312
1313        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1314
1315        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1316            raise Exception('The same name has been used for deltas and covobs!')
1317        new_samples = []
1318        new_means = []
1319        new_idl = []
1320        new_names_obs = []
1321        for name in new_names:
1322            if name not in new_covobs:
1323                new_samples.append(new_deltas[name])
1324                new_idl.append(new_idl_d[name])
1325                new_means.append(new_r_values[name][i_val])
1326                new_names_obs.append(name)
1327        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1328        for name in new_covobs:
1329            final_result[i_val].names.append(name)
1330        final_result[i_val]._covobs = new_covobs
1331        final_result[i_val]._value = new_val
1332        final_result[i_val].reweighted = reweighted
1333
1334    if multi == 0:
1335        final_result = final_result.item()
1336
1337    return final_result
1338
1339
1340def _reduce_deltas(deltas, idx_old, idx_new):
1341    """Extract deltas defined on idx_old on all configs of idx_new.
1342
1343    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1344    are ordered in an ascending order.
1345
1346    Parameters
1347    ----------
1348    deltas : list
1349        List of fluctuations
1350    idx_old : list
1351        List or range of configs on which the deltas are defined
1352    idx_new : list
1353        List of configs for which we want to extract the deltas.
1354        Has to be a subset of idx_old.
1355    """
1356    if not len(deltas) == len(idx_old):
1357        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1358    if type(idx_old) is range and type(idx_new) is range:
1359        if idx_old == idx_new:
1360            return deltas
1361    if _check_lists_equal([idx_old, idx_new]):
1362        return deltas
1363    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1364    if len(indices) < len(idx_new):
1365        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1366    return np.array(deltas)[indices]
1367
1368
1369def reweight(weight, obs, **kwargs):
1370    """Reweight a list of observables.
1371
1372    Parameters
1373    ----------
1374    weight : Obs
1375        Reweighting factor. An Observable that has to be defined on a superset of the
1376        configurations in obs[i].idl for all i.
1377    obs : list
1378        list of Obs, e.g. [obs1, obs2, obs3].
1379    all_configs : bool
1380        if True, the reweighted observables are normalized by the average of
1381        the reweighting factor on all configurations in weight.idl and not
1382        on the configurations in obs[i].idl. Default False.
1383    """
1384    result = []
1385    for i in range(len(obs)):
1386        if len(obs[i].cov_names):
1387            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1388        if not set(obs[i].names).issubset(weight.names):
1389            raise Exception('Error: Ensembles do not fit')
1390        for name in obs[i].names:
1391            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1392                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1393        new_samples = []
1394        w_deltas = {}
1395        for name in sorted(obs[i].names):
1396            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1397            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1398        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1399
1400        if kwargs.get('all_configs'):
1401            new_weight = weight
1402        else:
1403            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1404
1405        result.append(tmp_obs / new_weight)
1406        result[-1].reweighted = True
1407
1408    return result
1409
1410
1411def correlate(obs_a, obs_b):
1412    """Correlate two observables.
1413
1414    Parameters
1415    ----------
1416    obs_a : Obs
1417        First observable
1418    obs_b : Obs
1419        Second observable
1420
1421    Notes
1422    -----
1423    Keep in mind to only correlate primary observables which have not been reweighted
1424    yet. The reweighting has to be applied after correlating the observables.
1425    Currently only works if ensembles are identical (this is not strictly necessary).
1426    """
1427
1428    if sorted(obs_a.names) != sorted(obs_b.names):
1429        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1430    if len(obs_a.cov_names) or len(obs_b.cov_names):
1431        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1432    for name in obs_a.names:
1433        if obs_a.shape[name] != obs_b.shape[name]:
1434            raise Exception('Shapes of ensemble', name, 'do not fit')
1435        if obs_a.idl[name] != obs_b.idl[name]:
1436            raise Exception('idl of ensemble', name, 'do not fit')
1437
1438    if obs_a.reweighted is True:
1439        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1440    if obs_b.reweighted is True:
1441        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1442
1443    new_samples = []
1444    new_idl = []
1445    for name in sorted(obs_a.names):
1446        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1447        new_idl.append(obs_a.idl[name])
1448
1449    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1450    o.reweighted = obs_a.reweighted or obs_b.reweighted
1451    return o
1452
1453
1454def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1455    r'''Calculates the error covariance matrix of a set of observables.
1456
1457    WARNING: This function should be used with care, especially for observables with support on multiple
1458             ensembles with differing autocorrelations. See the notes below for details.
1459
1460    The gamma method has to be applied first to all observables.
1461
1462    Parameters
1463    ----------
1464    obs : list or numpy.ndarray
1465        List or one dimensional array of Obs
1466    visualize : bool
1467        If True plots the corresponding normalized correlation matrix (default False).
1468    correlation : bool
1469        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1470    smooth : None or int
1471        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1472        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1473        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1474        small ones.
1475
1476    Notes
1477    -----
1478    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1479    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1480    in the absence of autocorrelation.
1481    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1482    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1483    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1484    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1485    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1486    '''
1487
1488    length = len(obs)
1489
1490    max_samples = np.max([o.N for o in obs])
1491    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1492        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1493
1494    cov = np.zeros((length, length))
1495    for i in range(length):
1496        for j in range(i, length):
1497            cov[i, j] = _covariance_element(obs[i], obs[j])
1498    cov = cov + cov.T - np.diag(np.diag(cov))
1499
1500    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1501
1502    if isinstance(smooth, int):
1503        corr = _smooth_eigenvalues(corr, smooth)
1504
1505    if visualize:
1506        plt.matshow(corr, vmin=-1, vmax=1)
1507        plt.set_cmap('RdBu')
1508        plt.colorbar()
1509        plt.draw()
1510
1511    if correlation is True:
1512        return corr
1513
1514    errors = [o.dvalue for o in obs]
1515    cov = np.diag(errors) @ corr @ np.diag(errors)
1516
1517    eigenvalues = np.linalg.eigh(cov)[0]
1518    if not np.all(eigenvalues >= 0):
1519        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1520
1521    return cov
1522
1523
1524def _smooth_eigenvalues(corr, E):
1525    """Eigenvalue smoothing as described in hep-lat/9412087
1526
1527    corr : np.ndarray
1528        correlation matrix
1529    E : integer
1530        Number of eigenvalues to be left substantially unchanged
1531    """
1532    if not (2 < E < corr.shape[0] - 1):
1533        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1534    vals, vec = np.linalg.eigh(corr)
1535    lambda_min = np.mean(vals[:-E])
1536    vals[vals < lambda_min] = lambda_min
1537    vals /= np.mean(vals)
1538    return vec @ np.diag(vals) @ vec.T
1539
1540
1541def _covariance_element(obs1, obs2):
1542    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1543
1544    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1545        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1546        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1547        return np.sum(deltas1 * deltas2)
1548
1549    if set(obs1.names).isdisjoint(set(obs2.names)):
1550        return 0.0
1551
1552    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1553        raise Exception('The gamma method has to be applied to both Obs first.')
1554
1555    dvalue = 0.0
1556
1557    for e_name in obs1.mc_names:
1558
1559        if e_name not in obs2.mc_names:
1560            continue
1561
1562        idl_d = {}
1563        for r_name in obs1.e_content[e_name]:
1564            if r_name not in obs2.e_content[e_name]:
1565                continue
1566            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1567
1568        gamma = 0.0
1569
1570        for r_name in obs1.e_content[e_name]:
1571            if r_name not in obs2.e_content[e_name]:
1572                continue
1573            if len(idl_d[r_name]) == 0:
1574                continue
1575            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1576
1577        if gamma == 0.0:
1578            continue
1579
1580        gamma_div = 0.0
1581        for r_name in obs1.e_content[e_name]:
1582            if r_name not in obs2.e_content[e_name]:
1583                continue
1584            if len(idl_d[r_name]) == 0:
1585                continue
1586            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1587        gamma /= gamma_div
1588
1589        dvalue += gamma
1590
1591    for e_name in obs1.cov_names:
1592
1593        if e_name not in obs2.cov_names:
1594            continue
1595
1596        dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item()
1597
1598    return dvalue
1599
1600
1601def import_jackknife(jacks, name, idl=None):
1602    """Imports jackknife samples and returns an Obs
1603
1604    Parameters
1605    ----------
1606    jacks : numpy.ndarray
1607        numpy array containing the mean value as zeroth entry and
1608        the N jackknife samples as first to Nth entry.
1609    name : str
1610        name of the ensemble the samples are defined on.
1611    """
1612    length = len(jacks) - 1
1613    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1614    samples = jacks[1:] @ prj
1615    mean = np.mean(samples)
1616    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1617    new_obs._value = jacks[0]
1618    return new_obs
1619
1620
1621def import_bootstrap(boots, name, random_numbers):
1622    """Imports bootstrap samples and returns an Obs
1623
1624    Parameters
1625    ----------
1626    boots : numpy.ndarray
1627        numpy array containing the mean value as zeroth entry and
1628        the N bootstrap samples as first to Nth entry.
1629    name : str
1630        name of the ensemble the samples are defined on.
1631    random_numbers : np.ndarray
1632        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1633        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1634        chain to be reconstructed.
1635    """
1636    samples, length = random_numbers.shape
1637    if samples != len(boots) - 1:
1638        raise ValueError("Random numbers do not have the correct shape.")
1639
1640    if samples < length:
1641        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1642
1643    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1644
1645    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1646    ret = Obs([samples], [name])
1647    ret._value = boots[0]
1648    return ret
1649
1650
1651def merge_obs(list_of_obs):
1652    """Combine all observables in list_of_obs into one new observable
1653
1654    Parameters
1655    ----------
1656    list_of_obs : list
1657        list of the Obs object to be combined
1658
1659    Notes
1660    -----
1661    It is not possible to combine obs which are based on the same replicum
1662    """
1663    replist = [item for obs in list_of_obs for item in obs.names]
1664    if (len(replist) == len(set(replist))) is False:
1665        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1666    if any([len(o.cov_names) for o in list_of_obs]):
1667        raise Exception('Not possible to merge data that contains covobs!')
1668    new_dict = {}
1669    idl_dict = {}
1670    for o in list_of_obs:
1671        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1672                        for key in set(o.deltas) | set(o.r_values)})
1673        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1674
1675    names = sorted(new_dict.keys())
1676    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1677    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1678    return o
1679
1680
1681def cov_Obs(means, cov, name, grad=None):
1682    """Create an Obs based on mean(s) and a covariance matrix
1683
1684    Parameters
1685    ----------
1686    mean : list of floats or float
1687        N mean value(s) of the new Obs
1688    cov : list or array
1689        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1690    name : str
1691        identifier for the covariance matrix
1692    grad : list or array
1693        Gradient of the Covobs wrt. the means belonging to cov.
1694    """
1695
1696    def covobs_to_obs(co):
1697        """Make an Obs out of a Covobs
1698
1699        Parameters
1700        ----------
1701        co : Covobs
1702            Covobs to be embedded into the Obs
1703        """
1704        o = Obs([], [], means=[])
1705        o._value = co.value
1706        o.names.append(co.name)
1707        o._covobs[co.name] = co
1708        o._dvalue = np.sqrt(co.errsq())
1709        return o
1710
1711    ol = []
1712    if isinstance(means, (float, int)):
1713        means = [means]
1714
1715    for i in range(len(means)):
1716        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1717    if ol[0].covobs[name].N != len(means):
1718        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1719    if len(ol) == 1:
1720        return ol[0]
1721    return ol
1722
1723
1724def _determine_gap(o, e_content, e_name):
1725    gaps = []
1726    for r_name in e_content[e_name]:
1727        if isinstance(o.idl[r_name], range):
1728            gaps.append(o.idl[r_name].step)
1729        else:
1730            gaps.append(np.min(np.diff(o.idl[r_name])))
1731
1732    gap = min(gaps)
1733    if not np.all([gi % gap == 0 for gi in gaps]):
1734        raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps)
1735
1736    return gap
1737
1738
1739def _check_lists_equal(idl):
1740    '''
1741    Use groupby to efficiently check whether all elements of idl are identical.
1742    Returns True if all elements are equal, otherwise False.
1743
1744    Parameters
1745    ----------
1746    idl : list of lists, ranges or np.ndarrays
1747    '''
1748    g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl])
1749    if next(g, True) and not next(g, False):
1750        return True
1751    return False

class CObs: View Source

 917class CObs:
 918    """Class for a complex valued observable."""
 919    __slots__ = ['_real', '_imag', 'tag']
 920
 921    def __init__(self, real, imag=0.0):
 922        self._real = real
 923        self._imag = imag
 924        self.tag = None
 925
 926    @property
 927    def real(self):
 928        return self._real
 929
 930    @property
 931    def imag(self):
 932        return self._imag
 933
 934    def gamma_method(self, **kwargs):
 935        """Executes the gamma_method for the real and the imaginary part."""
 936        if isinstance(self.real, Obs):
 937            self.real.gamma_method(**kwargs)
 938        if isinstance(self.imag, Obs):
 939            self.imag.gamma_method(**kwargs)
 940
 941    def is_zero(self):
 942        """Checks whether both real and imaginary part are zero within machine precision."""
 943        return self.real == 0.0 and self.imag == 0.0
 944
 945    def conjugate(self):
 946        return CObs(self.real, -self.imag)
 947
 948    def __add__(self, other):
 949        if isinstance(other, np.ndarray):
 950            return other + self
 951        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 952            return CObs(self.real + other.real,
 953                        self.imag + other.imag)
 954        else:
 955            return CObs(self.real + other, self.imag)
 956
 957    def __radd__(self, y):
 958        return self + y
 959
 960    def __sub__(self, other):
 961        if isinstance(other, np.ndarray):
 962            return -1 * (other - self)
 963        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 964            return CObs(self.real - other.real, self.imag - other.imag)
 965        else:
 966            return CObs(self.real - other, self.imag)
 967
 968    def __rsub__(self, other):
 969        return -1 * (self - other)
 970
 971    def __mul__(self, other):
 972        if isinstance(other, np.ndarray):
 973            return other * self
 974        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 975            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 976                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 977                                               [self.real, other.real, self.imag, other.imag],
 978                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 979                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 980                                               [self.real, other.real, self.imag, other.imag],
 981                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 982            elif getattr(other, 'imag', 0) != 0:
 983                return CObs(self.real * other.real - self.imag * other.imag,
 984                            self.imag * other.real + self.real * other.imag)
 985            else:
 986                return CObs(self.real * other.real, self.imag * other.real)
 987        else:
 988            return CObs(self.real * other, self.imag * other)
 989
 990    def __rmul__(self, other):
 991        return self * other
 992
 993    def __truediv__(self, other):
 994        if isinstance(other, np.ndarray):
 995            return 1 / (other / self)
 996        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 997            r = other.real ** 2 + other.imag ** 2
 998            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 999        else:
1000            return CObs(self.real / other, self.imag / other)
1001
1002    def __rtruediv__(self, other):
1003        r = self.real ** 2 + self.imag ** 2
1004        if hasattr(other, 'real') and hasattr(other, 'imag'):
1005            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1006        else:
1007            return CObs(self.real * other / r, -self.imag * other / r)
1008
1009    def __abs__(self):
1010        return np.sqrt(self.real**2 + self.imag**2)
1011
1012    def __pos__(self):
1013        return self
1014
1015    def __neg__(self):
1016        return -1 * self
1017
1018    def __eq__(self, other):
1019        return self.real == other.real and self.imag == other.imag
1020
1021    def __str__(self):
1022        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1023
1024    def __repr__(self):
1025        return 'CObs[' + str(self) + ']'
1026
1027    def __format__(self, format_type):
1028        if format_type == "":
1029            significance = 2
1030            format_type = "2"
1031        else:
1032            significance = int(float(format_type.replace("+", "").replace("-", "")))
1033        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"

Class for a complex valued observable.

CObs(real, imag=0.0) View Source

921    def __init__(self, real, imag=0.0):
922        self._real = real
923        self._imag = imag
924        self.tag = None

tag

real

imag

def gamma_method(self, **kwargs): View Source

934    def gamma_method(self, **kwargs):
935        """Executes the gamma_method for the real and the imaginary part."""
936        if isinstance(self.real, Obs):
937            self.real.gamma_method(**kwargs)
938        if isinstance(self.imag, Obs):
939            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self): View Source

941    def is_zero(self):
942        """Checks whether both real and imaginary part are zero within machine precision."""
943        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self): View Source

945    def conjugate(self):
946        return CObs(self.real, -self.imag)

def gamma_method(x, **kwargs): View Source

1036def gamma_method(x, **kwargs):
1037    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1038
1039    See docstring of pe.Obs.gamma_method for details.
1040    """
1041    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def gm(x, **kwargs): View Source

1044def gm(x, **kwargs):
1045    """Short version of the vectorized gamma_method.
1046
1047    See docstring of pe.Obs.gamma_method for details
1048    """
1049    return gamma_method(x, **kwargs)

Short version of the vectorized gamma_method.

See docstring of pe.Obs.gamma_method for details

def derived_observable(func, data, array_mode=False, **kwargs): View Source

1171def derived_observable(func, data, array_mode=False, **kwargs):
1172    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1173
1174    Parameters
1175    ----------
1176    func : object
1177        arbitrary function of the form func(data, **kwargs). For the
1178        automatic differentiation to work, all numpy functions have to have
1179        the autograd wrapper (use 'import autograd.numpy as anp').
1180    data : list
1181        list of Obs, e.g. [obs1, obs2, obs3].
1182    num_grad : bool
1183        if True, numerical derivatives are used instead of autograd
1184        (default False). To control the numerical differentiation the
1185        kwargs of numdifftools.step_generators.MaxStepGenerator
1186        can be used.
1187    man_grad : list
1188        manually supply a list or an array which contains the jacobian
1189        of func. Use cautiously, supplying the wrong derivative will
1190        not be intercepted.
1191
1192    Notes
1193    -----
1194    For simple mathematical operations it can be practical to use anonymous
1195    functions. For the ratio of two observables one can e.g. use
1196
1197    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1198    """
1199
1200    data = np.asarray(data)
1201    raveled_data = data.ravel()
1202
1203    # Workaround for matrix operations containing non Obs data
1204    if not all(isinstance(x, Obs) for x in raveled_data):
1205        for i in range(len(raveled_data)):
1206            if isinstance(raveled_data[i], (int, float)):
1207                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1208
1209    allcov = {}
1210    for o in raveled_data:
1211        for name in o.cov_names:
1212            if name in allcov:
1213                if not np.allclose(allcov[name], o.covobs[name].cov):
1214                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1215            else:
1216                allcov[name] = o.covobs[name].cov
1217
1218    n_obs = len(raveled_data)
1219    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1220    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1221    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1222
1223    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1224
1225    if data.ndim == 1:
1226        values = np.array([o.value for o in data])
1227    else:
1228        values = np.vectorize(lambda x: x.value)(data)
1229
1230    new_values = func(values, **kwargs)
1231
1232    multi = int(isinstance(new_values, np.ndarray))
1233
1234    new_r_values = {}
1235    new_idl_d = {}
1236    for name in new_sample_names:
1237        idl = []
1238        tmp_values = np.zeros(n_obs)
1239        for i, item in enumerate(raveled_data):
1240            tmp_values[i] = item.r_values.get(name, item.value)
1241            tmp_idl = item.idl.get(name)
1242            if tmp_idl is not None:
1243                idl.append(tmp_idl)
1244        if multi > 0:
1245            tmp_values = np.array(tmp_values).reshape(data.shape)
1246        new_r_values[name] = func(tmp_values, **kwargs)
1247        new_idl_d[name] = _merge_idx(idl)
1248
1249    if 'man_grad' in kwargs:
1250        deriv = np.asarray(kwargs.get('man_grad'))
1251        if new_values.shape + data.shape != deriv.shape:
1252            raise Exception('Manual derivative does not have correct shape.')
1253    elif kwargs.get('num_grad') is True:
1254        if multi > 0:
1255            raise Exception('Multi mode currently not supported for numerical derivative')
1256        options = {
1257            'base_step': 0.1,
1258            'step_ratio': 2.5}
1259        for key in options.keys():
1260            kwarg = kwargs.get(key)
1261            if kwarg is not None:
1262                options[key] = kwarg
1263        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1264        if tmp_df.size == 1:
1265            deriv = np.array([tmp_df.real])
1266        else:
1267            deriv = tmp_df.real
1268    else:
1269        deriv = jacobian(func)(values, **kwargs)
1270
1271    final_result = np.zeros(new_values.shape, dtype=object)
1272
1273    if array_mode is True:
1274
1275        class _Zero_grad():
1276            def __init__(self, N):
1277                self.grad = np.zeros((N, 1))
1278
1279        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1280        d_extracted = {}
1281        g_extracted = {}
1282        for name in new_sample_names:
1283            d_extracted[name] = []
1284            ens_length = len(new_idl_d[name])
1285            for i_dat, dat in enumerate(data):
1286                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1287        for name in new_cov_names:
1288            g_extracted[name] = []
1289            zero_grad = _Zero_grad(new_covobs_lengths[name])
1290            for i_dat, dat in enumerate(data):
1291                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1292
1293    for i_val, new_val in np.ndenumerate(new_values):
1294        new_deltas = {}
1295        new_grad = {}
1296        if array_mode is True:
1297            for name in new_sample_names:
1298                ens_length = d_extracted[name][0].shape[-1]
1299                new_deltas[name] = np.zeros(ens_length)
1300                for i_dat, dat in enumerate(d_extracted[name]):
1301                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1302            for name in new_cov_names:
1303                new_grad[name] = 0
1304                for i_dat, dat in enumerate(g_extracted[name]):
1305                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1306        else:
1307            for j_obs, obs in np.ndenumerate(data):
1308                for name in obs.names:
1309                    if name in obs.cov_names:
1310                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1311                    else:
1312                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1313
1314        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1315
1316        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1317            raise Exception('The same name has been used for deltas and covobs!')
1318        new_samples = []
1319        new_means = []
1320        new_idl = []
1321        new_names_obs = []
1322        for name in new_names:
1323            if name not in new_covobs:
1324                new_samples.append(new_deltas[name])
1325                new_idl.append(new_idl_d[name])
1326                new_means.append(new_r_values[name][i_val])
1327                new_names_obs.append(name)
1328        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1329        for name in new_covobs:
1330            final_result[i_val].names.append(name)
1331        final_result[i_val]._covobs = new_covobs
1332        final_result[i_val]._value = new_val
1333        final_result[i_val].reweighted = reweighted
1334
1335    if multi == 0:
1336        final_result = final_result.item()
1337
1338    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters

func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
data (list): list of Obs, e.g. [obs1, obs2, obs3].
num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.

Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs): View Source

1370def reweight(weight, obs, **kwargs):
1371    """Reweight a list of observables.
1372
1373    Parameters
1374    ----------
1375    weight : Obs
1376        Reweighting factor. An Observable that has to be defined on a superset of the
1377        configurations in obs[i].idl for all i.
1378    obs : list
1379        list of Obs, e.g. [obs1, obs2, obs3].
1380    all_configs : bool
1381        if True, the reweighted observables are normalized by the average of
1382        the reweighting factor on all configurations in weight.idl and not
1383        on the configurations in obs[i].idl. Default False.
1384    """
1385    result = []
1386    for i in range(len(obs)):
1387        if len(obs[i].cov_names):
1388            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1389        if not set(obs[i].names).issubset(weight.names):
1390            raise Exception('Error: Ensembles do not fit')
1391        for name in obs[i].names:
1392            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1393                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1394        new_samples = []
1395        w_deltas = {}
1396        for name in sorted(obs[i].names):
1397            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1398            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1399        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1400
1401        if kwargs.get('all_configs'):
1402            new_weight = weight
1403        else:
1404            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1405
1406        result.append(tmp_obs / new_weight)
1407        result[-1].reweighted = True
1408
1409    return result

Reweight a list of observables.

Parameters

weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
obs (list): list of Obs, e.g. [obs1, obs2, obs3].
all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.

def correlate(obs_a, obs_b): View Source

1412def correlate(obs_a, obs_b):
1413    """Correlate two observables.
1414
1415    Parameters
1416    ----------
1417    obs_a : Obs
1418        First observable
1419    obs_b : Obs
1420        Second observable
1421
1422    Notes
1423    -----
1424    Keep in mind to only correlate primary observables which have not been reweighted
1425    yet. The reweighting has to be applied after correlating the observables.
1426    Currently only works if ensembles are identical (this is not strictly necessary).
1427    """
1428
1429    if sorted(obs_a.names) != sorted(obs_b.names):
1430        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1431    if len(obs_a.cov_names) or len(obs_b.cov_names):
1432        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1433    for name in obs_a.names:
1434        if obs_a.shape[name] != obs_b.shape[name]:
1435            raise Exception('Shapes of ensemble', name, 'do not fit')
1436        if obs_a.idl[name] != obs_b.idl[name]:
1437            raise Exception('idl of ensemble', name, 'do not fit')
1438
1439    if obs_a.reweighted is True:
1440        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1441    if obs_b.reweighted is True:
1442        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1443
1444    new_samples = []
1445    new_idl = []
1446    for name in sorted(obs_a.names):
1447        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1448        new_idl.append(obs_a.idl[name])
1449
1450    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1451    o.reweighted = obs_a.reweighted or obs_b.reweighted
1452    return o

Correlate two observables.

Parameters

obs_a (Obs): First observable
obs_b (Obs): Second observable

Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): View Source

1455def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1456    r'''Calculates the error covariance matrix of a set of observables.
1457
1458    WARNING: This function should be used with care, especially for observables with support on multiple
1459             ensembles with differing autocorrelations. See the notes below for details.
1460
1461    The gamma method has to be applied first to all observables.
1462
1463    Parameters
1464    ----------
1465    obs : list or numpy.ndarray
1466        List or one dimensional array of Obs
1467    visualize : bool
1468        If True plots the corresponding normalized correlation matrix (default False).
1469    correlation : bool
1470        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1471    smooth : None or int
1472        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1473        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1474        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1475        small ones.
1476
1477    Notes
1478    -----
1479    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1480    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1481    in the absence of autocorrelation.
1482    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1483    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1484    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1485    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1486    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1487    '''
1488
1489    length = len(obs)
1490
1491    max_samples = np.max([o.N for o in obs])
1492    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1493        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1494
1495    cov = np.zeros((length, length))
1496    for i in range(length):
1497        for j in range(i, length):
1498            cov[i, j] = _covariance_element(obs[i], obs[j])
1499    cov = cov + cov.T - np.diag(np.diag(cov))
1500
1501    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1502
1503    if isinstance(smooth, int):
1504        corr = _smooth_eigenvalues(corr, smooth)
1505
1506    if visualize:
1507        plt.matshow(corr, vmin=-1, vmax=1)
1508        plt.set_cmap('RdBu')
1509        plt.colorbar()
1510        plt.draw()
1511
1512    if correlation is True:
1513        return corr
1514
1515    errors = [o.dvalue for o in obs]
1516    cov = np.diag(errors) @ corr @ np.diag(errors)
1517
1518    eigenvalues = np.linalg.eigh(cov)[0]
1519    if not np.all(eigenvalues >= 0):
1520        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1521
1522    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters

obs (list or numpy.ndarray): List or one dimensional array of Obs
visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.

Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None): View Source

1602def import_jackknife(jacks, name, idl=None):
1603    """Imports jackknife samples and returns an Obs
1604
1605    Parameters
1606    ----------
1607    jacks : numpy.ndarray
1608        numpy array containing the mean value as zeroth entry and
1609        the N jackknife samples as first to Nth entry.
1610    name : str
1611        name of the ensemble the samples are defined on.
1612    """
1613    length = len(jacks) - 1
1614    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1615    samples = jacks[1:] @ prj
1616    mean = np.mean(samples)
1617    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1618    new_obs._value = jacks[0]
1619    return new_obs

Imports jackknife samples and returns an Obs

Parameters

jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
name (str): name of the ensemble the samples are defined on.

def import_bootstrap(boots, name, random_numbers): View Source

1622def import_bootstrap(boots, name, random_numbers):
1623    """Imports bootstrap samples and returns an Obs
1624
1625    Parameters
1626    ----------
1627    boots : numpy.ndarray
1628        numpy array containing the mean value as zeroth entry and
1629        the N bootstrap samples as first to Nth entry.
1630    name : str
1631        name of the ensemble the samples are defined on.
1632    random_numbers : np.ndarray
1633        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1634        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1635        chain to be reconstructed.
1636    """
1637    samples, length = random_numbers.shape
1638    if samples != len(boots) - 1:
1639        raise ValueError("Random numbers do not have the correct shape.")
1640
1641    if samples < length:
1642        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1643
1644    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1645
1646    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1647    ret = Obs([samples], [name])
1648    ret._value = boots[0]
1649    return ret

Imports bootstrap samples and returns an Obs

Parameters

boots (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N bootstrap samples as first to Nth entry.
name (str): name of the ensemble the samples are defined on.
random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, where samples is the number of bootstrap samples and length is the length of the original Monte Carlo chain to be reconstructed.

def merge_obs(list_of_obs): View Source

1652def merge_obs(list_of_obs):
1653    """Combine all observables in list_of_obs into one new observable
1654
1655    Parameters
1656    ----------
1657    list_of_obs : list
1658        list of the Obs object to be combined
1659
1660    Notes
1661    -----
1662    It is not possible to combine obs which are based on the same replicum
1663    """
1664    replist = [item for obs in list_of_obs for item in obs.names]
1665    if (len(replist) == len(set(replist))) is False:
1666        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1667    if any([len(o.cov_names) for o in list_of_obs]):
1668        raise Exception('Not possible to merge data that contains covobs!')
1669    new_dict = {}
1670    idl_dict = {}
1671    for o in list_of_obs:
1672        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1673                        for key in set(o.deltas) | set(o.r_values)})
1674        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1675
1676    names = sorted(new_dict.keys())
1677    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1678    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1679    return o

Combine all observables in list_of_obs into one new observable

Parameters

list_of_obs (list): list of the Obs object to be combined

Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None): View Source

1682def cov_Obs(means, cov, name, grad=None):
1683    """Create an Obs based on mean(s) and a covariance matrix
1684
1685    Parameters
1686    ----------
1687    mean : list of floats or float
1688        N mean value(s) of the new Obs
1689    cov : list or array
1690        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1691    name : str
1692        identifier for the covariance matrix
1693    grad : list or array
1694        Gradient of the Covobs wrt. the means belonging to cov.
1695    """
1696
1697    def covobs_to_obs(co):
1698        """Make an Obs out of a Covobs
1699
1700        Parameters
1701        ----------
1702        co : Covobs
1703            Covobs to be embedded into the Obs
1704        """
1705        o = Obs([], [], means=[])
1706        o._value = co.value
1707        o.names.append(co.name)
1708        o._covobs[co.name] = co
1709        o._dvalue = np.sqrt(co.errsq())
1710        return o
1711
1712    ol = []
1713    if isinstance(means, (float, int)):
1714        means = [means]
1715
1716    for i in range(len(means)):
1717        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1718    if ol[0].covobs[name].N != len(means):
1719        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1720    if len(ol) == 1:
1721        return ol[0]
1722    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters

mean (list of floats or float): N mean value(s) of the new Obs
cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
name (str): identifier for the covariance matrix
grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.