pyerrors.obs

View Source

   1import warnings
   2import hashlib
   3import pickle
   4import numpy as np
   5import autograd.numpy as anp  # Thinly-wrapped numpy
   6import scipy
   7from autograd import jacobian
   8import matplotlib.pyplot as plt
   9from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  10import numdifftools as nd
  11from itertools import groupby
  12from .covobs import Covobs
  13
  14# Improve print output of numpy.ndarrays containing Obs objects.
  15np.set_printoptions(formatter={'object': lambda x: str(x)})
  16
  17
  18class Obs:
  19    """Class for a general observable.
  20
  21    Instances of Obs are the basic objects of a pyerrors error analysis.
  22    They are initialized with a list which contains arrays of samples for
  23    different ensembles/replica and another list of same length which contains
  24    the names of the ensembles/replica. Mathematical operations can be
  25    performed on instances. The result is another instance of Obs. The error of
  26    an instance can be computed with the gamma_method. Also contains additional
  27    methods for output and visualization of the error calculation.
  28
  29    Attributes
  30    ----------
  31    S_global : float
  32        Standard value for S (default 2.0)
  33    S_dict : dict
  34        Dictionary for S values. If an entry for a given ensemble
  35        exists this overwrites the standard value for that ensemble.
  36    tau_exp_global : float
  37        Standard value for tau_exp (default 0.0)
  38    tau_exp_dict : dict
  39        Dictionary for tau_exp values. If an entry for a given ensemble exists
  40        this overwrites the standard value for that ensemble.
  41    N_sigma_global : float
  42        Standard value for N_sigma (default 1.0)
  43    N_sigma_dict : dict
  44        Dictionary for N_sigma values. If an entry for a given ensemble exists
  45        this overwrites the standard value for that ensemble.
  46    """
  47    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  48                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  49                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  50                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  51                 'idl', 'tag', '_covobs', '__dict__']
  52
  53    S_global = 2.0
  54    S_dict = {}
  55    tau_exp_global = 0.0
  56    tau_exp_dict = {}
  57    N_sigma_global = 1.0
  58    N_sigma_dict = {}
  59
  60    def __init__(self, samples, names, idl=None, **kwargs):
  61        """ Initialize Obs object.
  62
  63        Parameters
  64        ----------
  65        samples : list
  66            list of numpy arrays containing the Monte Carlo samples
  67        names : list
  68            list of strings labeling the individual samples
  69        idl : list, optional
  70            list of ranges or lists on which the samples are defined
  71        """
  72
  73        if kwargs.get("means") is None and len(samples):
  74            if len(samples) != len(names):
  75                raise ValueError('Length of samples and names incompatible.')
  76            if idl is not None:
  77                if len(idl) != len(names):
  78                    raise ValueError('Length of idl incompatible with samples and names.')
  79            name_length = len(names)
  80            if name_length > 1:
  81                if name_length != len(set(names)):
  82                    raise ValueError('Names are not unique.')
  83                if not all(isinstance(x, str) for x in names):
  84                    raise TypeError('All names have to be strings.')
  85            else:
  86                if not isinstance(names[0], str):
  87                    raise TypeError('All names have to be strings.')
  88            if min(len(x) for x in samples) <= 4:
  89                raise ValueError('Samples have to have at least 5 entries.')
  90
  91        self.names = sorted(names)
  92        self.shape = {}
  93        self.r_values = {}
  94        self.deltas = {}
  95        self._covobs = {}
  96
  97        self._value = 0
  98        self.N = 0
  99        self.idl = {}
 100        if idl is not None:
 101            for name, idx in sorted(zip(names, idl)):
 102                if isinstance(idx, range):
 103                    self.idl[name] = idx
 104                elif isinstance(idx, (list, np.ndarray)):
 105                    dc = np.unique(np.diff(idx))
 106                    if np.any(dc < 0):
 107                        raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]])))
 108                    elif np.any(dc == 0):
 109                        raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]])))
 110                    if len(dc) == 1:
 111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 112                    else:
 113                        self.idl[name] = list(idx)
 114                else:
 115                    raise TypeError('incompatible type for idl[%s].' % (name))
 116        else:
 117            for name, sample in sorted(zip(names, samples)):
 118                self.idl[name] = range(1, len(sample) + 1)
 119
 120        if kwargs.get("means") is not None:
 121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 122                self.shape[name] = len(self.idl[name])
 123                self.N += self.shape[name]
 124                self.r_values[name] = mean
 125                self.deltas[name] = sample
 126        else:
 127            for name, sample in sorted(zip(names, samples)):
 128                self.shape[name] = len(self.idl[name])
 129                self.N += self.shape[name]
 130                if len(sample) != self.shape[name]:
 131                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 132                self.r_values[name] = np.mean(sample)
 133                self.deltas[name] = sample - self.r_values[name]
 134                self._value += self.shape[name] * self.r_values[name]
 135            self._value /= self.N
 136
 137        self._dvalue = 0.0
 138        self.ddvalue = 0.0
 139        self.reweighted = False
 140
 141        self.tag = None
 142
 143    @property
 144    def value(self):
 145        return self._value
 146
 147    @property
 148    def dvalue(self):
 149        return self._dvalue
 150
 151    @property
 152    def e_names(self):
 153        return sorted(set([o.split('|')[0] for o in self.names]))
 154
 155    @property
 156    def cov_names(self):
 157        return sorted(set([o for o in self.covobs.keys()]))
 158
 159    @property
 160    def mc_names(self):
 161        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 162
 163    @property
 164    def e_content(self):
 165        res = {}
 166        for e, e_name in enumerate(self.e_names):
 167            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 168            if e_name in self.names:
 169                res[e_name].append(e_name)
 170        return res
 171
 172    @property
 173    def covobs(self):
 174        return self._covobs
 175
 176    def gamma_method(self, **kwargs):
 177        """Estimate the error and related properties of the Obs.
 178
 179        Parameters
 180        ----------
 181        S : float
 182            specifies a custom value for the parameter S (default 2.0).
 183            If set to 0 it is assumed that the data exhibits no
 184            autocorrelation. In this case the error estimates coincides
 185            with the sample standard error.
 186        tau_exp : float
 187            positive value triggers the critical slowing down analysis
 188            (default 0.0).
 189        N_sigma : float
 190            number of standard deviations from zero until the tail is
 191            attached to the autocorrelation function (default 1).
 192        fft : bool
 193            determines whether the fft algorithm is used for the computation
 194            of the autocorrelation function (default True)
 195        """
 196
 197        e_content = self.e_content
 198        self.e_dvalue = {}
 199        self.e_ddvalue = {}
 200        self.e_tauint = {}
 201        self.e_dtauint = {}
 202        self.e_windowsize = {}
 203        self.e_n_tauint = {}
 204        self.e_n_dtauint = {}
 205        e_gamma = {}
 206        self.e_rho = {}
 207        self.e_drho = {}
 208        self._dvalue = 0
 209        self.ddvalue = 0
 210
 211        self.S = {}
 212        self.tau_exp = {}
 213        self.N_sigma = {}
 214
 215        if kwargs.get('fft') is False:
 216            fft = False
 217        else:
 218            fft = True
 219
 220        def _parse_kwarg(kwarg_name):
 221            if kwarg_name in kwargs:
 222                tmp = kwargs.get(kwarg_name)
 223                if isinstance(tmp, (int, float)):
 224                    if tmp < 0:
 225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 226                    for e, e_name in enumerate(self.e_names):
 227                        getattr(self, kwarg_name)[e_name] = tmp
 228                else:
 229                    raise TypeError(kwarg_name + ' is not in proper format.')
 230            else:
 231                for e, e_name in enumerate(self.e_names):
 232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 234                    else:
 235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 236
 237        _parse_kwarg('S')
 238        _parse_kwarg('tau_exp')
 239        _parse_kwarg('N_sigma')
 240
 241        for e, e_name in enumerate(self.mc_names):
 242            gapsize = _determine_gap(self, e_content, e_name)
 243
 244            r_length = []
 245            for r_name in e_content[e_name]:
 246                if isinstance(self.idl[r_name], range):
 247                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
 248                else:
 249                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
 250
 251            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 252            w_max = max(r_length) // 2
 253            e_gamma[e_name] = np.zeros(w_max)
 254            self.e_rho[e_name] = np.zeros(w_max)
 255            self.e_drho[e_name] = np.zeros(w_max)
 256
 257            for r_name in e_content[e_name]:
 258                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 259
 260            gamma_div = np.zeros(w_max)
 261            for r_name in e_content[e_name]:
 262                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 263            gamma_div[gamma_div < 1] = 1.0
 264            e_gamma[e_name] /= gamma_div[:w_max]
 265
 266            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 267                self.e_tauint[e_name] = 0.5
 268                self.e_dtauint[e_name] = 0.0
 269                self.e_dvalue[e_name] = 0.0
 270                self.e_ddvalue[e_name] = 0.0
 271                self.e_windowsize[e_name] = 0
 272                continue
 273
 274            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 275            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 276            # Make sure no entry of tauint is smaller than 0.5
 277            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 278            # hep-lat/0306017 eq. (42)
 279            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 280            self.e_n_dtauint[e_name][0] = 0.0
 281
 282            def _compute_drho(i):
 283                tmp = (self.e_rho[e_name][i + 1:w_max]
 284                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
 285                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
 286                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
 287                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 288
 289            if self.tau_exp[e_name] > 0:
 290                _compute_drho(1)
 291                texp = self.tau_exp[e_name]
 292                # Critical slowing down analysis
 293                if w_max // 2 <= 1:
 294                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 295                for n in range(1, w_max // 2):
 296                    _compute_drho(n + 1)
 297                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 298                        # Bias correction hep-lat/0306017 eq. (49) included
 299                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 300                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 301                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 302                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 303                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 304                        self.e_windowsize[e_name] = n
 305                        break
 306            else:
 307                if self.S[e_name] == 0.0:
 308                    self.e_tauint[e_name] = 0.5
 309                    self.e_dtauint[e_name] = 0.0
 310                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 311                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 312                    self.e_windowsize[e_name] = 0
 313                else:
 314                    # Standard automatic windowing procedure
 315                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 316                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 317                    for n in range(1, w_max):
 318                        if g_w[n - 1] < 0 or n >= w_max - 1:
 319                            _compute_drho(n)
 320                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 321                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 322                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 323                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 324                            self.e_windowsize[e_name] = n
 325                            break
 326
 327            self._dvalue += self.e_dvalue[e_name] ** 2
 328            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 329
 330        for e_name in self.cov_names:
 331            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 332            self.e_ddvalue[e_name] = 0
 333            self._dvalue += self.e_dvalue[e_name]**2
 334
 335        self._dvalue = np.sqrt(self._dvalue)
 336        if self._dvalue == 0.0:
 337            self.ddvalue = 0.0
 338        else:
 339            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 340        return
 341
 342    gm = gamma_method
 343
 344    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
 345        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 346           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 347
 348        Parameters
 349        ----------
 350        deltas : list
 351            List of fluctuations
 352        idx : list
 353            List or range of configurations on which the deltas are defined.
 354        shape : int
 355            Number of configurations in idx.
 356        w_max : int
 357            Upper bound for the summation window.
 358        fft : bool
 359            determines whether the fft algorithm is used for the computation
 360            of the autocorrelation function.
 361        gapsize : int
 362            The target distance between two configurations. If longer distances
 363            are found in idx, the data is expanded.
 364        """
 365        gamma = np.zeros(w_max)
 366        deltas = _expand_deltas(deltas, idx, shape, gapsize)
 367        new_shape = len(deltas)
 368        if fft:
 369            max_gamma = min(new_shape, w_max)
 370            # The padding for the fft has to be even
 371            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 372            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 373        else:
 374            for n in range(w_max):
 375                if new_shape - n >= 0:
 376                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 377
 378        return gamma
 379
 380    def details(self, ens_content=True):
 381        """Output detailed properties of the Obs.
 382
 383        Parameters
 384        ----------
 385        ens_content : bool
 386            print details about the ensembles and replica if true.
 387        """
 388        if self.tag is not None:
 389            print("Description:", self.tag)
 390        if not hasattr(self, 'e_dvalue'):
 391            print('Result\t %3.8e' % (self.value))
 392        else:
 393            if self.value == 0.0:
 394                percentage = np.nan
 395            else:
 396                percentage = np.abs(self._dvalue / self.value) * 100
 397            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 398            if len(self.e_names) > 1:
 399                print(' Ensemble errors:')
 400            e_content = self.e_content
 401            for e_name in self.mc_names:
 402                gap = _determine_gap(self, e_content, e_name)
 403
 404                if len(self.e_names) > 1:
 405                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 406                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 407                tau_string += f" in units of {gap} config"
 408                if gap > 1:
 409                    tau_string += "s"
 410                if self.tau_exp[e_name] > 0:
 411                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 412                else:
 413                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 414                print(tau_string)
 415            for e_name in self.cov_names:
 416                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 417        if ens_content is True:
 418            if len(self.e_names) == 1:
 419                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 420            else:
 421                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 422            my_string_list = []
 423            for key, value in sorted(self.e_content.items()):
 424                if key not in self.covobs:
 425                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 426                    if len(value) == 1:
 427                        my_string += f': {self.shape[value[0]]} configurations'
 428                        if isinstance(self.idl[value[0]], range):
 429                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 430                        else:
 431                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 432                    else:
 433                        sublist = []
 434                        for v in value:
 435                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 436                            my_substring += f': {self.shape[v]} configurations'
 437                            if isinstance(self.idl[v], range):
 438                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 439                            else:
 440                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 441                            sublist.append(my_substring)
 442
 443                        my_string += '\n' + '\n'.join(sublist)
 444                else:
 445                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 446                my_string_list.append(my_string)
 447            print('\n'.join(my_string_list))
 448
 449    def reweight(self, weight):
 450        """Reweight the obs with given rewighting factors.
 451
 452        Parameters
 453        ----------
 454        weight : Obs
 455            Reweighting factor. An Observable that has to be defined on a superset of the
 456            configurations in obs[i].idl for all i.
 457        all_configs : bool
 458            if True, the reweighted observables are normalized by the average of
 459            the reweighting factor on all configurations in weight.idl and not
 460            on the configurations in obs[i].idl. Default False.
 461        """
 462        return reweight(weight, [self])[0]
 463
 464    def is_zero_within_error(self, sigma=1):
 465        """Checks whether the observable is zero within 'sigma' standard errors.
 466
 467        Parameters
 468        ----------
 469        sigma : int
 470            Number of standard errors used for the check.
 471
 472        Works only properly when the gamma method was run.
 473        """
 474        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 475
 476    def is_zero(self, atol=1e-10):
 477        """Checks whether the observable is zero within a given tolerance.
 478
 479        Parameters
 480        ----------
 481        atol : float
 482            Absolute tolerance (for details see numpy documentation).
 483        """
 484        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 485
 486    def plot_tauint(self, save=None):
 487        """Plot integrated autocorrelation time for each ensemble.
 488
 489        Parameters
 490        ----------
 491        save : str
 492            saves the figure to a file named 'save' if.
 493        """
 494        if not hasattr(self, 'e_dvalue'):
 495            raise Exception('Run the gamma method first.')
 496
 497        for e, e_name in enumerate(self.mc_names):
 498            fig = plt.figure()
 499            plt.xlabel(r'$W$')
 500            plt.ylabel(r'$\tau_\mathrm{int}$')
 501            length = int(len(self.e_n_tauint[e_name]))
 502            if self.tau_exp[e_name] > 0:
 503                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 504                x_help = np.arange(2 * self.tau_exp[e_name])
 505                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 506                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 507                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 508                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 509                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 510                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 511                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 512            else:
 513                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 514                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 515
 516            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 517            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 518            plt.legend()
 519            plt.xlim(-0.5, xmax)
 520            ylim = plt.ylim()
 521            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 522            plt.draw()
 523            if save:
 524                fig.savefig(save + "_" + str(e))
 525
 526    def plot_rho(self, save=None):
 527        """Plot normalized autocorrelation function time for each ensemble.
 528
 529        Parameters
 530        ----------
 531        save : str
 532            saves the figure to a file named 'save' if.
 533        """
 534        if not hasattr(self, 'e_dvalue'):
 535            raise Exception('Run the gamma method first.')
 536        for e, e_name in enumerate(self.mc_names):
 537            fig = plt.figure()
 538            plt.xlabel('W')
 539            plt.ylabel('rho')
 540            length = int(len(self.e_drho[e_name]))
 541            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 542            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 543            if self.tau_exp[e_name] > 0:
 544                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 545                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 546                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 547                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 548            else:
 549                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 550                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 551            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 552            plt.xlim(-0.5, xmax)
 553            plt.draw()
 554            if save:
 555                fig.savefig(save + "_" + str(e))
 556
 557    def plot_rep_dist(self):
 558        """Plot replica distribution for each ensemble with more than one replicum."""
 559        if not hasattr(self, 'e_dvalue'):
 560            raise Exception('Run the gamma method first.')
 561        for e, e_name in enumerate(self.mc_names):
 562            if len(self.e_content[e_name]) == 1:
 563                print('No replica distribution for a single replicum (', e_name, ')')
 564                continue
 565            r_length = []
 566            sub_r_mean = 0
 567            for r, r_name in enumerate(self.e_content[e_name]):
 568                r_length.append(len(self.deltas[r_name]))
 569                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 570            e_N = np.sum(r_length)
 571            sub_r_mean /= e_N
 572            arr = np.zeros(len(self.e_content[e_name]))
 573            for r, r_name in enumerate(self.e_content[e_name]):
 574                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 575            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 576            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 577            plt.draw()
 578
 579    def plot_history(self, expand=True):
 580        """Plot derived Monte Carlo history for each ensemble
 581
 582        Parameters
 583        ----------
 584        expand : bool
 585            show expanded history for irregular Monte Carlo chains (default: True).
 586        """
 587        for e, e_name in enumerate(self.mc_names):
 588            plt.figure()
 589            r_length = []
 590            tmp = []
 591            tmp_expanded = []
 592            for r, r_name in enumerate(self.e_content[e_name]):
 593                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 594                if expand:
 595                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
 596                    r_length.append(len(tmp_expanded[-1]))
 597                else:
 598                    r_length.append(len(tmp[-1]))
 599            e_N = np.sum(r_length)
 600            x = np.arange(e_N)
 601            y_test = np.concatenate(tmp, axis=0)
 602            if expand:
 603                y = np.concatenate(tmp_expanded, axis=0)
 604            else:
 605                y = y_test
 606            plt.errorbar(x, y, fmt='.', markersize=3)
 607            plt.xlim(-0.5, e_N - 0.5)
 608            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 609            plt.draw()
 610
 611    def plot_piechart(self, save=None):
 612        """Plot piechart which shows the fractional contribution of each
 613        ensemble to the error and returns a dictionary containing the fractions.
 614
 615        Parameters
 616        ----------
 617        save : str
 618            saves the figure to a file named 'save' if.
 619        """
 620        if not hasattr(self, 'e_dvalue'):
 621            raise Exception('Run the gamma method first.')
 622        if np.isclose(0.0, self._dvalue, atol=1e-15):
 623            raise Exception('Error is 0.0')
 624        labels = self.e_names
 625        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 626        fig1, ax1 = plt.subplots()
 627        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 628        ax1.axis('equal')
 629        plt.draw()
 630        if save:
 631            fig1.savefig(save)
 632
 633        return dict(zip(labels, sizes))
 634
 635    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 636        """Dump the Obs to a file 'name' of chosen format.
 637
 638        Parameters
 639        ----------
 640        filename : str
 641            name of the file to be saved.
 642        datatype : str
 643            Format of the exported file. Supported formats include
 644            "json.gz" and "pickle"
 645        description : str
 646            Description for output file, only relevant for json.gz format.
 647        path : str
 648            specifies a custom path for the file (default '.')
 649        """
 650        if 'path' in kwargs:
 651            file_name = kwargs.get('path') + '/' + filename
 652        else:
 653            file_name = filename
 654
 655        if datatype == "json.gz":
 656            from .input.json import dump_to_json
 657            dump_to_json([self], file_name, description=description)
 658        elif datatype == "pickle":
 659            with open(file_name + '.p', 'wb') as fb:
 660                pickle.dump(self, fb)
 661        else:
 662            raise Exception("Unknown datatype " + str(datatype))
 663
 664    def export_jackknife(self):
 665        """Export jackknife samples from the Obs
 666
 667        Returns
 668        -------
 669        numpy.ndarray
 670            Returns a numpy array of length N + 1 where N is the number of samples
 671            for the given ensemble and replicum. The zeroth entry of the array contains
 672            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 673            derived from the Obs. The current implementation only works for observables
 674            defined on exactly one ensemble and replicum. The derived jackknife samples
 675            should agree with samples from a full jackknife analysis up to O(1/N).
 676        """
 677
 678        if len(self.names) != 1:
 679            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 680
 681        name = self.names[0]
 682        full_data = self.deltas[name] + self.r_values[name]
 683        n = full_data.size
 684        mean = self.value
 685        tmp_jacks = np.zeros(n + 1)
 686        tmp_jacks[0] = mean
 687        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 688        return tmp_jacks
 689
 690    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
 691        """Export bootstrap samples from the Obs
 692
 693        Parameters
 694        ----------
 695        samples : int
 696            Number of bootstrap samples to generate.
 697        random_numbers : np.ndarray
 698            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
 699            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
 700        save_rng : str
 701            Save the random numbers to a file if a path is specified.
 702
 703        Returns
 704        -------
 705        numpy.ndarray
 706            Returns a numpy array of length N + 1 where N is the number of samples
 707            for the given ensemble and replicum. The zeroth entry of the array contains
 708            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
 709            derived from the Obs. The current implementation only works for observables
 710            defined on exactly one ensemble and replicum. The derived bootstrap samples
 711            should agree with samples from a full bootstrap analysis up to O(1/N).
 712        """
 713        if len(self.names) != 1:
 714            raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
 715
 716        name = self.names[0]
 717        length = self.N
 718
 719        if random_numbers is None:
 720            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
 721            rng = np.random.default_rng(seed)
 722            random_numbers = rng.integers(0, length, size=(samples, length))
 723
 724        if save_rng is not None:
 725            np.savetxt(save_rng, random_numbers, fmt='%i')
 726
 727        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
 728        ret = np.zeros(samples + 1)
 729        ret[0] = self.value
 730        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
 731        return ret
 732
 733    def __float__(self):
 734        return float(self.value)
 735
 736    def __repr__(self):
 737        return 'Obs[' + str(self) + ']'
 738
 739    def __str__(self):
 740        return _format_uncertainty(self.value, self._dvalue)
 741
 742    def __format__(self, format_type):
 743        if format_type == "":
 744            significance = 2
 745        else:
 746            significance = int(float(format_type.replace("+", "").replace("-", "")))
 747        my_str = _format_uncertainty(self.value, self._dvalue,
 748                                     significance=significance)
 749        for char in ["+", " "]:
 750            if format_type.startswith(char):
 751                if my_str[0] != "-":
 752                    my_str = char + my_str
 753        return my_str
 754
 755    def __hash__(self):
 756        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 757        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 758        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 759        hash_tuple += tuple([o.encode() for o in self.names])
 760        m = hashlib.md5()
 761        [m.update(o) for o in hash_tuple]
 762        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 763
 764    # Overload comparisons
 765    def __lt__(self, other):
 766        return self.value < other
 767
 768    def __le__(self, other):
 769        return self.value <= other
 770
 771    def __gt__(self, other):
 772        return self.value > other
 773
 774    def __ge__(self, other):
 775        return self.value >= other
 776
 777    def __eq__(self, other):
 778        if other is None:
 779            return False
 780        return (self - other).is_zero()
 781
 782    # Overload math operations
 783    def __add__(self, y):
 784        if isinstance(y, Obs):
 785            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 786        else:
 787            if isinstance(y, np.ndarray):
 788                return np.array([self + o for o in y])
 789            elif isinstance(y, complex):
 790                return CObs(self, 0) + y
 791            elif y.__class__.__name__ in ['Corr', 'CObs']:
 792                return NotImplemented
 793            else:
 794                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 795
 796    def __radd__(self, y):
 797        return self + y
 798
 799    def __mul__(self, y):
 800        if isinstance(y, Obs):
 801            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 802        else:
 803            if isinstance(y, np.ndarray):
 804                return np.array([self * o for o in y])
 805            elif isinstance(y, complex):
 806                return CObs(self * y.real, self * y.imag)
 807            elif y.__class__.__name__ in ['Corr', 'CObs']:
 808                return NotImplemented
 809            else:
 810                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 811
 812    def __rmul__(self, y):
 813        return self * y
 814
 815    def __sub__(self, y):
 816        if isinstance(y, Obs):
 817            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 818        else:
 819            if isinstance(y, np.ndarray):
 820                return np.array([self - o for o in y])
 821            elif y.__class__.__name__ in ['Corr', 'CObs']:
 822                return NotImplemented
 823            else:
 824                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 825
 826    def __rsub__(self, y):
 827        return -1 * (self - y)
 828
 829    def __pos__(self):
 830        return self
 831
 832    def __neg__(self):
 833        return -1 * self
 834
 835    def __truediv__(self, y):
 836        if isinstance(y, Obs):
 837            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 838        else:
 839            if isinstance(y, np.ndarray):
 840                return np.array([self / o for o in y])
 841            elif y.__class__.__name__ in ['Corr', 'CObs']:
 842                return NotImplemented
 843            else:
 844                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 845
 846    def __rtruediv__(self, y):
 847        if isinstance(y, Obs):
 848            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 849        else:
 850            if isinstance(y, np.ndarray):
 851                return np.array([o / self for o in y])
 852            elif y.__class__.__name__ in ['Corr', 'CObs']:
 853                return NotImplemented
 854            else:
 855                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 856
 857    def __pow__(self, y):
 858        if isinstance(y, Obs):
 859            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 860        else:
 861            return derived_observable(lambda x: x[0] ** y, [self])
 862
 863    def __rpow__(self, y):
 864        if isinstance(y, Obs):
 865            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 866        else:
 867            return derived_observable(lambda x: y ** x[0], [self])
 868
 869    def __abs__(self):
 870        return derived_observable(lambda x: anp.abs(x[0]), [self])
 871
 872    # Overload numpy functions
 873    def sqrt(self):
 874        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 875
 876    def log(self):
 877        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 878
 879    def exp(self):
 880        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 881
 882    def sin(self):
 883        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 884
 885    def cos(self):
 886        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 887
 888    def tan(self):
 889        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 890
 891    def arcsin(self):
 892        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 893
 894    def arccos(self):
 895        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 896
 897    def arctan(self):
 898        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 899
 900    def sinh(self):
 901        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 902
 903    def cosh(self):
 904        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 905
 906    def tanh(self):
 907        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 908
 909    def arcsinh(self):
 910        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 911
 912    def arccosh(self):
 913        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 914
 915    def arctanh(self):
 916        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 917
 918
 919class CObs:
 920    """Class for a complex valued observable."""
 921    __slots__ = ['_real', '_imag', 'tag']
 922
 923    def __init__(self, real, imag=0.0):
 924        self._real = real
 925        self._imag = imag
 926        self.tag = None
 927
 928    @property
 929    def real(self):
 930        return self._real
 931
 932    @property
 933    def imag(self):
 934        return self._imag
 935
 936    def gamma_method(self, **kwargs):
 937        """Executes the gamma_method for the real and the imaginary part."""
 938        if isinstance(self.real, Obs):
 939            self.real.gamma_method(**kwargs)
 940        if isinstance(self.imag, Obs):
 941            self.imag.gamma_method(**kwargs)
 942
 943    def is_zero(self):
 944        """Checks whether both real and imaginary part are zero within machine precision."""
 945        return self.real == 0.0 and self.imag == 0.0
 946
 947    def conjugate(self):
 948        return CObs(self.real, -self.imag)
 949
 950    def __add__(self, other):
 951        if isinstance(other, np.ndarray):
 952            return other + self
 953        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 954            return CObs(self.real + other.real,
 955                        self.imag + other.imag)
 956        else:
 957            return CObs(self.real + other, self.imag)
 958
 959    def __radd__(self, y):
 960        return self + y
 961
 962    def __sub__(self, other):
 963        if isinstance(other, np.ndarray):
 964            return -1 * (other - self)
 965        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 966            return CObs(self.real - other.real, self.imag - other.imag)
 967        else:
 968            return CObs(self.real - other, self.imag)
 969
 970    def __rsub__(self, other):
 971        return -1 * (self - other)
 972
 973    def __mul__(self, other):
 974        if isinstance(other, np.ndarray):
 975            return other * self
 976        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 977            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 978                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 979                                               [self.real, other.real, self.imag, other.imag],
 980                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 981                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 982                                               [self.real, other.real, self.imag, other.imag],
 983                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 984            elif getattr(other, 'imag', 0) != 0:
 985                return CObs(self.real * other.real - self.imag * other.imag,
 986                            self.imag * other.real + self.real * other.imag)
 987            else:
 988                return CObs(self.real * other.real, self.imag * other.real)
 989        else:
 990            return CObs(self.real * other, self.imag * other)
 991
 992    def __rmul__(self, other):
 993        return self * other
 994
 995    def __truediv__(self, other):
 996        if isinstance(other, np.ndarray):
 997            return 1 / (other / self)
 998        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 999            r = other.real ** 2 + other.imag ** 2
1000            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
1001        else:
1002            return CObs(self.real / other, self.imag / other)
1003
1004    def __rtruediv__(self, other):
1005        r = self.real ** 2 + self.imag ** 2
1006        if hasattr(other, 'real') and hasattr(other, 'imag'):
1007            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1008        else:
1009            return CObs(self.real * other / r, -self.imag * other / r)
1010
1011    def __abs__(self):
1012        return np.sqrt(self.real**2 + self.imag**2)
1013
1014    def __pos__(self):
1015        return self
1016
1017    def __neg__(self):
1018        return -1 * self
1019
1020    def __eq__(self, other):
1021        return self.real == other.real and self.imag == other.imag
1022
1023    def __str__(self):
1024        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1025
1026    def __repr__(self):
1027        return 'CObs[' + str(self) + ']'
1028
1029    def __format__(self, format_type):
1030        if format_type == "":
1031            significance = 2
1032            format_type = "2"
1033        else:
1034            significance = int(float(format_type.replace("+", "").replace("-", "")))
1035        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"
1036
1037
1038def gamma_method(x, **kwargs):
1039    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1040
1041    See docstring of pe.Obs.gamma_method for details.
1042    """
1043    return np.vectorize(lambda o: o.gm(**kwargs))(x)
1044
1045
1046gm = gamma_method
1047
1048
1049def _format_uncertainty(value, dvalue, significance=2):
1050    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
1051    if dvalue == 0.0 or (not np.isfinite(dvalue)):
1052        return str(value)
1053    if not isinstance(significance, int):
1054        raise TypeError("significance needs to be an integer.")
1055    if significance < 1:
1056        raise ValueError("significance needs to be larger than zero.")
1057    fexp = np.floor(np.log10(dvalue))
1058    if fexp < 0.0:
1059        return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f')
1060    elif fexp == 0.0:
1061        return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})"
1062    else:
1063        return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})"
1064
1065
1066def _expand_deltas(deltas, idx, shape, gapsize):
1067    """Expand deltas defined on idx to a regular range with spacing gapsize between two
1068       configurations and where holes are filled by 0.
1069       If idx is of type range, the deltas are not changed if the idx.step == gapsize.
1070
1071    Parameters
1072    ----------
1073    deltas : list
1074        List of fluctuations
1075    idx : list
1076        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1077    shape : int
1078        Number of configs in idx.
1079    gapsize : int
1080        The target distance between two configurations. If longer distances
1081        are found in idx, the data is expanded.
1082    """
1083    if isinstance(idx, range):
1084        if (idx.step == gapsize):
1085            return deltas
1086    ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize)
1087    for i in range(shape):
1088        ret[(idx[i] - idx[0]) // gapsize] = deltas[i]
1089    return ret
1090
1091
1092def _merge_idx(idl):
1093    """Returns the union of all lists in idl as range or sorted list
1094
1095    Parameters
1096    ----------
1097    idl : list
1098        List of lists or ranges.
1099    """
1100
1101    if _check_lists_equal(idl):
1102        return idl[0]
1103
1104    idunion = sorted(set().union(*idl))
1105
1106    # Check whether idunion can be expressed as range
1107    idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0])
1108    idtest = [list(idrange), idunion]
1109    if _check_lists_equal(idtest):
1110        return idrange
1111
1112    return idunion
1113
1114
1115def _intersection_idx(idl):
1116    """Returns the intersection of all lists in idl as range or sorted list
1117
1118    Parameters
1119    ----------
1120    idl : list
1121        List of lists or ranges.
1122    """
1123
1124    if _check_lists_equal(idl):
1125        return idl[0]
1126
1127    idinter = sorted(set.intersection(*[set(o) for o in idl]))
1128
1129    # Check whether idinter can be expressed as range
1130    try:
1131        idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0])
1132        idtest = [list(idrange), idinter]
1133        if _check_lists_equal(idtest):
1134            return idrange
1135    except IndexError:
1136        pass
1137
1138    return idinter
1139
1140
1141def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1142    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1143       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1144       common divisor of the step sizes is used as new step size.
1145
1146    Parameters
1147    ----------
1148    deltas : list
1149        List of fluctuations
1150    idx : list
1151        List or range of configs on which the deltas are defined.
1152        Has to be a subset of new_idx and has to be sorted in ascending order.
1153    shape : list
1154        Number of configs in idx.
1155    new_idx : list
1156        List of configs that defines the new range, has to be sorted in ascending order.
1157    """
1158
1159    if type(idx) is range and type(new_idx) is range:
1160        if idx == new_idx:
1161            return deltas
1162    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1163    for i in range(shape):
1164        ret[idx[i] - new_idx[0]] = deltas[i]
1165    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx)
1166
1167
1168def derived_observable(func, data, array_mode=False, **kwargs):
1169    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1170
1171    Parameters
1172    ----------
1173    func : object
1174        arbitrary function of the form func(data, **kwargs). For the
1175        automatic differentiation to work, all numpy functions have to have
1176        the autograd wrapper (use 'import autograd.numpy as anp').
1177    data : list
1178        list of Obs, e.g. [obs1, obs2, obs3].
1179    num_grad : bool
1180        if True, numerical derivatives are used instead of autograd
1181        (default False). To control the numerical differentiation the
1182        kwargs of numdifftools.step_generators.MaxStepGenerator
1183        can be used.
1184    man_grad : list
1185        manually supply a list or an array which contains the jacobian
1186        of func. Use cautiously, supplying the wrong derivative will
1187        not be intercepted.
1188
1189    Notes
1190    -----
1191    For simple mathematical operations it can be practical to use anonymous
1192    functions. For the ratio of two observables one can e.g. use
1193
1194    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1195    """
1196
1197    data = np.asarray(data)
1198    raveled_data = data.ravel()
1199
1200    # Workaround for matrix operations containing non Obs data
1201    if not all(isinstance(x, Obs) for x in raveled_data):
1202        for i in range(len(raveled_data)):
1203            if isinstance(raveled_data[i], (int, float)):
1204                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1205
1206    allcov = {}
1207    for o in raveled_data:
1208        for name in o.cov_names:
1209            if name in allcov:
1210                if not np.allclose(allcov[name], o.covobs[name].cov):
1211                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1212            else:
1213                allcov[name] = o.covobs[name].cov
1214
1215    n_obs = len(raveled_data)
1216    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1217    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1218    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1219
1220    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1221
1222    if data.ndim == 1:
1223        values = np.array([o.value for o in data])
1224    else:
1225        values = np.vectorize(lambda x: x.value)(data)
1226
1227    new_values = func(values, **kwargs)
1228
1229    multi = int(isinstance(new_values, np.ndarray))
1230
1231    new_r_values = {}
1232    new_idl_d = {}
1233    for name in new_sample_names:
1234        idl = []
1235        tmp_values = np.zeros(n_obs)
1236        for i, item in enumerate(raveled_data):
1237            tmp_values[i] = item.r_values.get(name, item.value)
1238            tmp_idl = item.idl.get(name)
1239            if tmp_idl is not None:
1240                idl.append(tmp_idl)
1241        if multi > 0:
1242            tmp_values = np.array(tmp_values).reshape(data.shape)
1243        new_r_values[name] = func(tmp_values, **kwargs)
1244        new_idl_d[name] = _merge_idx(idl)
1245
1246    if 'man_grad' in kwargs:
1247        deriv = np.asarray(kwargs.get('man_grad'))
1248        if new_values.shape + data.shape != deriv.shape:
1249            raise Exception('Manual derivative does not have correct shape.')
1250    elif kwargs.get('num_grad') is True:
1251        if multi > 0:
1252            raise Exception('Multi mode currently not supported for numerical derivative')
1253        options = {
1254            'base_step': 0.1,
1255            'step_ratio': 2.5}
1256        for key in options.keys():
1257            kwarg = kwargs.get(key)
1258            if kwarg is not None:
1259                options[key] = kwarg
1260        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1261        if tmp_df.size == 1:
1262            deriv = np.array([tmp_df.real])
1263        else:
1264            deriv = tmp_df.real
1265    else:
1266        deriv = jacobian(func)(values, **kwargs)
1267
1268    final_result = np.zeros(new_values.shape, dtype=object)
1269
1270    if array_mode is True:
1271
1272        class _Zero_grad():
1273            def __init__(self, N):
1274                self.grad = np.zeros((N, 1))
1275
1276        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1277        d_extracted = {}
1278        g_extracted = {}
1279        for name in new_sample_names:
1280            d_extracted[name] = []
1281            ens_length = len(new_idl_d[name])
1282            for i_dat, dat in enumerate(data):
1283                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1284        for name in new_cov_names:
1285            g_extracted[name] = []
1286            zero_grad = _Zero_grad(new_covobs_lengths[name])
1287            for i_dat, dat in enumerate(data):
1288                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1289
1290    for i_val, new_val in np.ndenumerate(new_values):
1291        new_deltas = {}
1292        new_grad = {}
1293        if array_mode is True:
1294            for name in new_sample_names:
1295                ens_length = d_extracted[name][0].shape[-1]
1296                new_deltas[name] = np.zeros(ens_length)
1297                for i_dat, dat in enumerate(d_extracted[name]):
1298                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1299            for name in new_cov_names:
1300                new_grad[name] = 0
1301                for i_dat, dat in enumerate(g_extracted[name]):
1302                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1303        else:
1304            for j_obs, obs in np.ndenumerate(data):
1305                for name in obs.names:
1306                    if name in obs.cov_names:
1307                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1308                    else:
1309                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1310
1311        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1312
1313        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1314            raise Exception('The same name has been used for deltas and covobs!')
1315        new_samples = []
1316        new_means = []
1317        new_idl = []
1318        new_names_obs = []
1319        for name in new_names:
1320            if name not in new_covobs:
1321                new_samples.append(new_deltas[name])
1322                new_idl.append(new_idl_d[name])
1323                new_means.append(new_r_values[name][i_val])
1324                new_names_obs.append(name)
1325        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1326        for name in new_covobs:
1327            final_result[i_val].names.append(name)
1328        final_result[i_val]._covobs = new_covobs
1329        final_result[i_val]._value = new_val
1330        final_result[i_val].reweighted = reweighted
1331
1332    if multi == 0:
1333        final_result = final_result.item()
1334
1335    return final_result
1336
1337
1338def _reduce_deltas(deltas, idx_old, idx_new):
1339    """Extract deltas defined on idx_old on all configs of idx_new.
1340
1341    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1342    are ordered in an ascending order.
1343
1344    Parameters
1345    ----------
1346    deltas : list
1347        List of fluctuations
1348    idx_old : list
1349        List or range of configs on which the deltas are defined
1350    idx_new : list
1351        List of configs for which we want to extract the deltas.
1352        Has to be a subset of idx_old.
1353    """
1354    if not len(deltas) == len(idx_old):
1355        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1356    if type(idx_old) is range and type(idx_new) is range:
1357        if idx_old == idx_new:
1358            return deltas
1359    if _check_lists_equal([idx_old, idx_new]):
1360        return deltas
1361    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1362    if len(indices) < len(idx_new):
1363        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1364    return np.array(deltas)[indices]
1365
1366
1367def reweight(weight, obs, **kwargs):
1368    """Reweight a list of observables.
1369
1370    Parameters
1371    ----------
1372    weight : Obs
1373        Reweighting factor. An Observable that has to be defined on a superset of the
1374        configurations in obs[i].idl for all i.
1375    obs : list
1376        list of Obs, e.g. [obs1, obs2, obs3].
1377    all_configs : bool
1378        if True, the reweighted observables are normalized by the average of
1379        the reweighting factor on all configurations in weight.idl and not
1380        on the configurations in obs[i].idl. Default False.
1381    """
1382    result = []
1383    for i in range(len(obs)):
1384        if len(obs[i].cov_names):
1385            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1386        if not set(obs[i].names).issubset(weight.names):
1387            raise Exception('Error: Ensembles do not fit')
1388        for name in obs[i].names:
1389            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1390                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1391        new_samples = []
1392        w_deltas = {}
1393        for name in sorted(obs[i].names):
1394            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1395            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1396        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1397
1398        if kwargs.get('all_configs'):
1399            new_weight = weight
1400        else:
1401            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1402
1403        result.append(tmp_obs / new_weight)
1404        result[-1].reweighted = True
1405
1406    return result
1407
1408
1409def correlate(obs_a, obs_b):
1410    """Correlate two observables.
1411
1412    Parameters
1413    ----------
1414    obs_a : Obs
1415        First observable
1416    obs_b : Obs
1417        Second observable
1418
1419    Notes
1420    -----
1421    Keep in mind to only correlate primary observables which have not been reweighted
1422    yet. The reweighting has to be applied after correlating the observables.
1423    Currently only works if ensembles are identical (this is not strictly necessary).
1424    """
1425
1426    if sorted(obs_a.names) != sorted(obs_b.names):
1427        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1428    if len(obs_a.cov_names) or len(obs_b.cov_names):
1429        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1430    for name in obs_a.names:
1431        if obs_a.shape[name] != obs_b.shape[name]:
1432            raise Exception('Shapes of ensemble', name, 'do not fit')
1433        if obs_a.idl[name] != obs_b.idl[name]:
1434            raise Exception('idl of ensemble', name, 'do not fit')
1435
1436    if obs_a.reweighted is True:
1437        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1438    if obs_b.reweighted is True:
1439        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1440
1441    new_samples = []
1442    new_idl = []
1443    for name in sorted(obs_a.names):
1444        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1445        new_idl.append(obs_a.idl[name])
1446
1447    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1448    o.reweighted = obs_a.reweighted or obs_b.reweighted
1449    return o
1450
1451
1452def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1453    r'''Calculates the error covariance matrix of a set of observables.
1454
1455    WARNING: This function should be used with care, especially for observables with support on multiple
1456             ensembles with differing autocorrelations. See the notes below for details.
1457
1458    The gamma method has to be applied first to all observables.
1459
1460    Parameters
1461    ----------
1462    obs : list or numpy.ndarray
1463        List or one dimensional array of Obs
1464    visualize : bool
1465        If True plots the corresponding normalized correlation matrix (default False).
1466    correlation : bool
1467        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1468    smooth : None or int
1469        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1470        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1471        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1472        small ones.
1473
1474    Notes
1475    -----
1476    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1477    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1478    in the absence of autocorrelation.
1479    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1480    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1481    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1482    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1483    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1484    '''
1485
1486    length = len(obs)
1487
1488    max_samples = np.max([o.N for o in obs])
1489    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1490        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1491
1492    cov = np.zeros((length, length))
1493    for i in range(length):
1494        for j in range(i, length):
1495            cov[i, j] = _covariance_element(obs[i], obs[j])
1496    cov = cov + cov.T - np.diag(np.diag(cov))
1497
1498    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1499
1500    if isinstance(smooth, int):
1501        corr = _smooth_eigenvalues(corr, smooth)
1502
1503    if visualize:
1504        plt.matshow(corr, vmin=-1, vmax=1)
1505        plt.set_cmap('RdBu')
1506        plt.colorbar()
1507        plt.draw()
1508
1509    if correlation is True:
1510        return corr
1511
1512    errors = [o.dvalue for o in obs]
1513    cov = np.diag(errors) @ corr @ np.diag(errors)
1514
1515    eigenvalues = np.linalg.eigh(cov)[0]
1516    if not np.all(eigenvalues >= 0):
1517        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1518
1519    return cov
1520
1521
1522def _smooth_eigenvalues(corr, E):
1523    """Eigenvalue smoothing as described in hep-lat/9412087
1524
1525    corr : np.ndarray
1526        correlation matrix
1527    E : integer
1528        Number of eigenvalues to be left substantially unchanged
1529    """
1530    if not (2 < E < corr.shape[0] - 1):
1531        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1532    vals, vec = np.linalg.eigh(corr)
1533    lambda_min = np.mean(vals[:-E])
1534    vals[vals < lambda_min] = lambda_min
1535    vals /= np.mean(vals)
1536    return vec @ np.diag(vals) @ vec.T
1537
1538
1539def _covariance_element(obs1, obs2):
1540    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1541
1542    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1543        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1544        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1545        return np.sum(deltas1 * deltas2)
1546
1547    if set(obs1.names).isdisjoint(set(obs2.names)):
1548        return 0.0
1549
1550    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1551        raise Exception('The gamma method has to be applied to both Obs first.')
1552
1553    dvalue = 0.0
1554
1555    for e_name in obs1.mc_names:
1556
1557        if e_name not in obs2.mc_names:
1558            continue
1559
1560        idl_d = {}
1561        for r_name in obs1.e_content[e_name]:
1562            if r_name not in obs2.e_content[e_name]:
1563                continue
1564            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1565
1566        gamma = 0.0
1567
1568        for r_name in obs1.e_content[e_name]:
1569            if r_name not in obs2.e_content[e_name]:
1570                continue
1571            if len(idl_d[r_name]) == 0:
1572                continue
1573            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1574
1575        if gamma == 0.0:
1576            continue
1577
1578        gamma_div = 0.0
1579        for r_name in obs1.e_content[e_name]:
1580            if r_name not in obs2.e_content[e_name]:
1581                continue
1582            if len(idl_d[r_name]) == 0:
1583                continue
1584            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1585        gamma /= gamma_div
1586
1587        dvalue += gamma
1588
1589    for e_name in obs1.cov_names:
1590
1591        if e_name not in obs2.cov_names:
1592            continue
1593
1594        dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item()
1595
1596    return dvalue
1597
1598
1599def import_jackknife(jacks, name, idl=None):
1600    """Imports jackknife samples and returns an Obs
1601
1602    Parameters
1603    ----------
1604    jacks : numpy.ndarray
1605        numpy array containing the mean value as zeroth entry and
1606        the N jackknife samples as first to Nth entry.
1607    name : str
1608        name of the ensemble the samples are defined on.
1609    """
1610    length = len(jacks) - 1
1611    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1612    samples = jacks[1:] @ prj
1613    mean = np.mean(samples)
1614    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1615    new_obs._value = jacks[0]
1616    return new_obs
1617
1618
1619def import_bootstrap(boots, name, random_numbers):
1620    """Imports bootstrap samples and returns an Obs
1621
1622    Parameters
1623    ----------
1624    boots : numpy.ndarray
1625        numpy array containing the mean value as zeroth entry and
1626        the N bootstrap samples as first to Nth entry.
1627    name : str
1628        name of the ensemble the samples are defined on.
1629    random_numbers : np.ndarray
1630        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1631        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1632        chain to be reconstructed.
1633    """
1634    samples, length = random_numbers.shape
1635    if samples != len(boots) - 1:
1636        raise ValueError("Random numbers do not have the correct shape.")
1637
1638    if samples < length:
1639        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1640
1641    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1642
1643    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1644    ret = Obs([samples], [name])
1645    ret._value = boots[0]
1646    return ret
1647
1648
1649def merge_obs(list_of_obs):
1650    """Combine all observables in list_of_obs into one new observable
1651
1652    Parameters
1653    ----------
1654    list_of_obs : list
1655        list of the Obs object to be combined
1656
1657    Notes
1658    -----
1659    It is not possible to combine obs which are based on the same replicum
1660    """
1661    replist = [item for obs in list_of_obs for item in obs.names]
1662    if (len(replist) == len(set(replist))) is False:
1663        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1664    if any([len(o.cov_names) for o in list_of_obs]):
1665        raise Exception('Not possible to merge data that contains covobs!')
1666    new_dict = {}
1667    idl_dict = {}
1668    for o in list_of_obs:
1669        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1670                        for key in set(o.deltas) | set(o.r_values)})
1671        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1672
1673    names = sorted(new_dict.keys())
1674    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1675    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1676    return o
1677
1678
1679def cov_Obs(means, cov, name, grad=None):
1680    """Create an Obs based on mean(s) and a covariance matrix
1681
1682    Parameters
1683    ----------
1684    mean : list of floats or float
1685        N mean value(s) of the new Obs
1686    cov : list or array
1687        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1688    name : str
1689        identifier for the covariance matrix
1690    grad : list or array
1691        Gradient of the Covobs wrt. the means belonging to cov.
1692    """
1693
1694    def covobs_to_obs(co):
1695        """Make an Obs out of a Covobs
1696
1697        Parameters
1698        ----------
1699        co : Covobs
1700            Covobs to be embedded into the Obs
1701        """
1702        o = Obs([], [], means=[])
1703        o._value = co.value
1704        o.names.append(co.name)
1705        o._covobs[co.name] = co
1706        o._dvalue = np.sqrt(co.errsq())
1707        return o
1708
1709    ol = []
1710    if isinstance(means, (float, int)):
1711        means = [means]
1712
1713    for i in range(len(means)):
1714        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1715    if ol[0].covobs[name].N != len(means):
1716        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1717    if len(ol) == 1:
1718        return ol[0]
1719    return ol
1720
1721
1722def _determine_gap(o, e_content, e_name):
1723    gaps = []
1724    for r_name in e_content[e_name]:
1725        if isinstance(o.idl[r_name], range):
1726            gaps.append(o.idl[r_name].step)
1727        else:
1728            gaps.append(np.min(np.diff(o.idl[r_name])))
1729
1730    gap = min(gaps)
1731    if not np.all([gi % gap == 0 for gi in gaps]):
1732        raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps)
1733
1734    return gap
1735
1736
1737def _check_lists_equal(idl):
1738    '''
1739    Use groupby to efficiently check whether all elements of idl are identical.
1740    Returns True if all elements are equal, otherwise False.
1741
1742    Parameters
1743    ----------
1744    idl : list of lists, ranges or np.ndarrays
1745    '''
1746    g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl])
1747    if next(g, True) and not next(g, False):
1748        return True
1749    return False

class CObs: View Source

 920class CObs:
 921    """Class for a complex valued observable."""
 922    __slots__ = ['_real', '_imag', 'tag']
 923
 924    def __init__(self, real, imag=0.0):
 925        self._real = real
 926        self._imag = imag
 927        self.tag = None
 928
 929    @property
 930    def real(self):
 931        return self._real
 932
 933    @property
 934    def imag(self):
 935        return self._imag
 936
 937    def gamma_method(self, **kwargs):
 938        """Executes the gamma_method for the real and the imaginary part."""
 939        if isinstance(self.real, Obs):
 940            self.real.gamma_method(**kwargs)
 941        if isinstance(self.imag, Obs):
 942            self.imag.gamma_method(**kwargs)
 943
 944    def is_zero(self):
 945        """Checks whether both real and imaginary part are zero within machine precision."""
 946        return self.real == 0.0 and self.imag == 0.0
 947
 948    def conjugate(self):
 949        return CObs(self.real, -self.imag)
 950
 951    def __add__(self, other):
 952        if isinstance(other, np.ndarray):
 953            return other + self
 954        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 955            return CObs(self.real + other.real,
 956                        self.imag + other.imag)
 957        else:
 958            return CObs(self.real + other, self.imag)
 959
 960    def __radd__(self, y):
 961        return self + y
 962
 963    def __sub__(self, other):
 964        if isinstance(other, np.ndarray):
 965            return -1 * (other - self)
 966        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 967            return CObs(self.real - other.real, self.imag - other.imag)
 968        else:
 969            return CObs(self.real - other, self.imag)
 970
 971    def __rsub__(self, other):
 972        return -1 * (self - other)
 973
 974    def __mul__(self, other):
 975        if isinstance(other, np.ndarray):
 976            return other * self
 977        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 978            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 979                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 980                                               [self.real, other.real, self.imag, other.imag],
 981                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 982                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 983                                               [self.real, other.real, self.imag, other.imag],
 984                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 985            elif getattr(other, 'imag', 0) != 0:
 986                return CObs(self.real * other.real - self.imag * other.imag,
 987                            self.imag * other.real + self.real * other.imag)
 988            else:
 989                return CObs(self.real * other.real, self.imag * other.real)
 990        else:
 991            return CObs(self.real * other, self.imag * other)
 992
 993    def __rmul__(self, other):
 994        return self * other
 995
 996    def __truediv__(self, other):
 997        if isinstance(other, np.ndarray):
 998            return 1 / (other / self)
 999        elif hasattr(other, 'real') and hasattr(other, 'imag'):
1000            r = other.real ** 2 + other.imag ** 2
1001            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
1002        else:
1003            return CObs(self.real / other, self.imag / other)
1004
1005    def __rtruediv__(self, other):
1006        r = self.real ** 2 + self.imag ** 2
1007        if hasattr(other, 'real') and hasattr(other, 'imag'):
1008            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1009        else:
1010            return CObs(self.real * other / r, -self.imag * other / r)
1011
1012    def __abs__(self):
1013        return np.sqrt(self.real**2 + self.imag**2)
1014
1015    def __pos__(self):
1016        return self
1017
1018    def __neg__(self):
1019        return -1 * self
1020
1021    def __eq__(self, other):
1022        return self.real == other.real and self.imag == other.imag
1023
1024    def __str__(self):
1025        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1026
1027    def __repr__(self):
1028        return 'CObs[' + str(self) + ']'
1029
1030    def __format__(self, format_type):
1031        if format_type == "":
1032            significance = 2
1033            format_type = "2"
1034        else:
1035            significance = int(float(format_type.replace("+", "").replace("-", "")))
1036        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"

Class for a complex valued observable.

CObs(real, imag=0.0) View Source

924    def __init__(self, real, imag=0.0):
925        self._real = real
926        self._imag = imag
927        self.tag = None

tag

real

imag

def gamma_method(self, **kwargs): View Source

937    def gamma_method(self, **kwargs):
938        """Executes the gamma_method for the real and the imaginary part."""
939        if isinstance(self.real, Obs):
940            self.real.gamma_method(**kwargs)
941        if isinstance(self.imag, Obs):
942            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self): View Source

944    def is_zero(self):
945        """Checks whether both real and imaginary part are zero within machine precision."""
946        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self): View Source

948    def conjugate(self):
949        return CObs(self.real, -self.imag)

def gamma_method(x, **kwargs): View Source

1039def gamma_method(x, **kwargs):
1040    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1041
1042    See docstring of pe.Obs.gamma_method for details.
1043    """
1044    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def gm(x, **kwargs): View Source

1039def gamma_method(x, **kwargs):
1040    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1041
1042    See docstring of pe.Obs.gamma_method for details.
1043    """
1044    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def derived_observable(func, data, array_mode=False, **kwargs): View Source

1169def derived_observable(func, data, array_mode=False, **kwargs):
1170    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1171
1172    Parameters
1173    ----------
1174    func : object
1175        arbitrary function of the form func(data, **kwargs). For the
1176        automatic differentiation to work, all numpy functions have to have
1177        the autograd wrapper (use 'import autograd.numpy as anp').
1178    data : list
1179        list of Obs, e.g. [obs1, obs2, obs3].
1180    num_grad : bool
1181        if True, numerical derivatives are used instead of autograd
1182        (default False). To control the numerical differentiation the
1183        kwargs of numdifftools.step_generators.MaxStepGenerator
1184        can be used.
1185    man_grad : list
1186        manually supply a list or an array which contains the jacobian
1187        of func. Use cautiously, supplying the wrong derivative will
1188        not be intercepted.
1189
1190    Notes
1191    -----
1192    For simple mathematical operations it can be practical to use anonymous
1193    functions. For the ratio of two observables one can e.g. use
1194
1195    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1196    """
1197
1198    data = np.asarray(data)
1199    raveled_data = data.ravel()
1200
1201    # Workaround for matrix operations containing non Obs data
1202    if not all(isinstance(x, Obs) for x in raveled_data):
1203        for i in range(len(raveled_data)):
1204            if isinstance(raveled_data[i], (int, float)):
1205                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1206
1207    allcov = {}
1208    for o in raveled_data:
1209        for name in o.cov_names:
1210            if name in allcov:
1211                if not np.allclose(allcov[name], o.covobs[name].cov):
1212                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1213            else:
1214                allcov[name] = o.covobs[name].cov
1215
1216    n_obs = len(raveled_data)
1217    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1218    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1219    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1220
1221    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1222
1223    if data.ndim == 1:
1224        values = np.array([o.value for o in data])
1225    else:
1226        values = np.vectorize(lambda x: x.value)(data)
1227
1228    new_values = func(values, **kwargs)
1229
1230    multi = int(isinstance(new_values, np.ndarray))
1231
1232    new_r_values = {}
1233    new_idl_d = {}
1234    for name in new_sample_names:
1235        idl = []
1236        tmp_values = np.zeros(n_obs)
1237        for i, item in enumerate(raveled_data):
1238            tmp_values[i] = item.r_values.get(name, item.value)
1239            tmp_idl = item.idl.get(name)
1240            if tmp_idl is not None:
1241                idl.append(tmp_idl)
1242        if multi > 0:
1243            tmp_values = np.array(tmp_values).reshape(data.shape)
1244        new_r_values[name] = func(tmp_values, **kwargs)
1245        new_idl_d[name] = _merge_idx(idl)
1246
1247    if 'man_grad' in kwargs:
1248        deriv = np.asarray(kwargs.get('man_grad'))
1249        if new_values.shape + data.shape != deriv.shape:
1250            raise Exception('Manual derivative does not have correct shape.')
1251    elif kwargs.get('num_grad') is True:
1252        if multi > 0:
1253            raise Exception('Multi mode currently not supported for numerical derivative')
1254        options = {
1255            'base_step': 0.1,
1256            'step_ratio': 2.5}
1257        for key in options.keys():
1258            kwarg = kwargs.get(key)
1259            if kwarg is not None:
1260                options[key] = kwarg
1261        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1262        if tmp_df.size == 1:
1263            deriv = np.array([tmp_df.real])
1264        else:
1265            deriv = tmp_df.real
1266    else:
1267        deriv = jacobian(func)(values, **kwargs)
1268
1269    final_result = np.zeros(new_values.shape, dtype=object)
1270
1271    if array_mode is True:
1272
1273        class _Zero_grad():
1274            def __init__(self, N):
1275                self.grad = np.zeros((N, 1))
1276
1277        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1278        d_extracted = {}
1279        g_extracted = {}
1280        for name in new_sample_names:
1281            d_extracted[name] = []
1282            ens_length = len(new_idl_d[name])
1283            for i_dat, dat in enumerate(data):
1284                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1285        for name in new_cov_names:
1286            g_extracted[name] = []
1287            zero_grad = _Zero_grad(new_covobs_lengths[name])
1288            for i_dat, dat in enumerate(data):
1289                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1290
1291    for i_val, new_val in np.ndenumerate(new_values):
1292        new_deltas = {}
1293        new_grad = {}
1294        if array_mode is True:
1295            for name in new_sample_names:
1296                ens_length = d_extracted[name][0].shape[-1]
1297                new_deltas[name] = np.zeros(ens_length)
1298                for i_dat, dat in enumerate(d_extracted[name]):
1299                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1300            for name in new_cov_names:
1301                new_grad[name] = 0
1302                for i_dat, dat in enumerate(g_extracted[name]):
1303                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1304        else:
1305            for j_obs, obs in np.ndenumerate(data):
1306                for name in obs.names:
1307                    if name in obs.cov_names:
1308                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1309                    else:
1310                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1311
1312        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1313
1314        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1315            raise Exception('The same name has been used for deltas and covobs!')
1316        new_samples = []
1317        new_means = []
1318        new_idl = []
1319        new_names_obs = []
1320        for name in new_names:
1321            if name not in new_covobs:
1322                new_samples.append(new_deltas[name])
1323                new_idl.append(new_idl_d[name])
1324                new_means.append(new_r_values[name][i_val])
1325                new_names_obs.append(name)
1326        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1327        for name in new_covobs:
1328            final_result[i_val].names.append(name)
1329        final_result[i_val]._covobs = new_covobs
1330        final_result[i_val]._value = new_val
1331        final_result[i_val].reweighted = reweighted
1332
1333    if multi == 0:
1334        final_result = final_result.item()
1335
1336    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters

func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
data (list): list of Obs, e.g. [obs1, obs2, obs3].
num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.

Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs): View Source

1368def reweight(weight, obs, **kwargs):
1369    """Reweight a list of observables.
1370
1371    Parameters
1372    ----------
1373    weight : Obs
1374        Reweighting factor. An Observable that has to be defined on a superset of the
1375        configurations in obs[i].idl for all i.
1376    obs : list
1377        list of Obs, e.g. [obs1, obs2, obs3].
1378    all_configs : bool
1379        if True, the reweighted observables are normalized by the average of
1380        the reweighting factor on all configurations in weight.idl and not
1381        on the configurations in obs[i].idl. Default False.
1382    """
1383    result = []
1384    for i in range(len(obs)):
1385        if len(obs[i].cov_names):
1386            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1387        if not set(obs[i].names).issubset(weight.names):
1388            raise Exception('Error: Ensembles do not fit')
1389        for name in obs[i].names:
1390            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1391                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1392        new_samples = []
1393        w_deltas = {}
1394        for name in sorted(obs[i].names):
1395            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1396            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1397        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1398
1399        if kwargs.get('all_configs'):
1400            new_weight = weight
1401        else:
1402            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1403
1404        result.append(tmp_obs / new_weight)
1405        result[-1].reweighted = True
1406
1407    return result

Reweight a list of observables.

Parameters

weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
obs (list): list of Obs, e.g. [obs1, obs2, obs3].
all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.

def correlate(obs_a, obs_b): View Source

1410def correlate(obs_a, obs_b):
1411    """Correlate two observables.
1412
1413    Parameters
1414    ----------
1415    obs_a : Obs
1416        First observable
1417    obs_b : Obs
1418        Second observable
1419
1420    Notes
1421    -----
1422    Keep in mind to only correlate primary observables which have not been reweighted
1423    yet. The reweighting has to be applied after correlating the observables.
1424    Currently only works if ensembles are identical (this is not strictly necessary).
1425    """
1426
1427    if sorted(obs_a.names) != sorted(obs_b.names):
1428        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1429    if len(obs_a.cov_names) or len(obs_b.cov_names):
1430        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1431    for name in obs_a.names:
1432        if obs_a.shape[name] != obs_b.shape[name]:
1433            raise Exception('Shapes of ensemble', name, 'do not fit')
1434        if obs_a.idl[name] != obs_b.idl[name]:
1435            raise Exception('idl of ensemble', name, 'do not fit')
1436
1437    if obs_a.reweighted is True:
1438        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1439    if obs_b.reweighted is True:
1440        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1441
1442    new_samples = []
1443    new_idl = []
1444    for name in sorted(obs_a.names):
1445        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1446        new_idl.append(obs_a.idl[name])
1447
1448    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1449    o.reweighted = obs_a.reweighted or obs_b.reweighted
1450    return o

Correlate two observables.

Parameters

obs_a (Obs): First observable
obs_b (Obs): Second observable

Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): View Source

1453def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1454    r'''Calculates the error covariance matrix of a set of observables.
1455
1456    WARNING: This function should be used with care, especially for observables with support on multiple
1457             ensembles with differing autocorrelations. See the notes below for details.
1458
1459    The gamma method has to be applied first to all observables.
1460
1461    Parameters
1462    ----------
1463    obs : list or numpy.ndarray
1464        List or one dimensional array of Obs
1465    visualize : bool
1466        If True plots the corresponding normalized correlation matrix (default False).
1467    correlation : bool
1468        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1469    smooth : None or int
1470        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1471        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1472        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1473        small ones.
1474
1475    Notes
1476    -----
1477    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1478    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1479    in the absence of autocorrelation.
1480    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1481    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1482    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1483    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1484    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1485    '''
1486
1487    length = len(obs)
1488
1489    max_samples = np.max([o.N for o in obs])
1490    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1491        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1492
1493    cov = np.zeros((length, length))
1494    for i in range(length):
1495        for j in range(i, length):
1496            cov[i, j] = _covariance_element(obs[i], obs[j])
1497    cov = cov + cov.T - np.diag(np.diag(cov))
1498
1499    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1500
1501    if isinstance(smooth, int):
1502        corr = _smooth_eigenvalues(corr, smooth)
1503
1504    if visualize:
1505        plt.matshow(corr, vmin=-1, vmax=1)
1506        plt.set_cmap('RdBu')
1507        plt.colorbar()
1508        plt.draw()
1509
1510    if correlation is True:
1511        return corr
1512
1513    errors = [o.dvalue for o in obs]
1514    cov = np.diag(errors) @ corr @ np.diag(errors)
1515
1516    eigenvalues = np.linalg.eigh(cov)[0]
1517    if not np.all(eigenvalues >= 0):
1518        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1519
1520    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters

obs (list or numpy.ndarray): List or one dimensional array of Obs
visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.

Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None): View Source

1600def import_jackknife(jacks, name, idl=None):
1601    """Imports jackknife samples and returns an Obs
1602
1603    Parameters
1604    ----------
1605    jacks : numpy.ndarray
1606        numpy array containing the mean value as zeroth entry and
1607        the N jackknife samples as first to Nth entry.
1608    name : str
1609        name of the ensemble the samples are defined on.
1610    """
1611    length = len(jacks) - 1
1612    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1613    samples = jacks[1:] @ prj
1614    mean = np.mean(samples)
1615    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1616    new_obs._value = jacks[0]
1617    return new_obs

Imports jackknife samples and returns an Obs

Parameters

jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
name (str): name of the ensemble the samples are defined on.

def import_bootstrap(boots, name, random_numbers): View Source

1620def import_bootstrap(boots, name, random_numbers):
1621    """Imports bootstrap samples and returns an Obs
1622
1623    Parameters
1624    ----------
1625    boots : numpy.ndarray
1626        numpy array containing the mean value as zeroth entry and
1627        the N bootstrap samples as first to Nth entry.
1628    name : str
1629        name of the ensemble the samples are defined on.
1630    random_numbers : np.ndarray
1631        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1632        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1633        chain to be reconstructed.
1634    """
1635    samples, length = random_numbers.shape
1636    if samples != len(boots) - 1:
1637        raise ValueError("Random numbers do not have the correct shape.")
1638
1639    if samples < length:
1640        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1641
1642    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1643
1644    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1645    ret = Obs([samples], [name])
1646    ret._value = boots[0]
1647    return ret

Imports bootstrap samples and returns an Obs

Parameters

boots (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N bootstrap samples as first to Nth entry.
name (str): name of the ensemble the samples are defined on.
random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, where samples is the number of bootstrap samples and length is the length of the original Monte Carlo chain to be reconstructed.

def merge_obs(list_of_obs): View Source

1650def merge_obs(list_of_obs):
1651    """Combine all observables in list_of_obs into one new observable
1652
1653    Parameters
1654    ----------
1655    list_of_obs : list
1656        list of the Obs object to be combined
1657
1658    Notes
1659    -----
1660    It is not possible to combine obs which are based on the same replicum
1661    """
1662    replist = [item for obs in list_of_obs for item in obs.names]
1663    if (len(replist) == len(set(replist))) is False:
1664        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1665    if any([len(o.cov_names) for o in list_of_obs]):
1666        raise Exception('Not possible to merge data that contains covobs!')
1667    new_dict = {}
1668    idl_dict = {}
1669    for o in list_of_obs:
1670        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1671                        for key in set(o.deltas) | set(o.r_values)})
1672        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1673
1674    names = sorted(new_dict.keys())
1675    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1676    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1677    return o

Combine all observables in list_of_obs into one new observable

Parameters

list_of_obs (list): list of the Obs object to be combined

Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None): View Source

1680def cov_Obs(means, cov, name, grad=None):
1681    """Create an Obs based on mean(s) and a covariance matrix
1682
1683    Parameters
1684    ----------
1685    mean : list of floats or float
1686        N mean value(s) of the new Obs
1687    cov : list or array
1688        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1689    name : str
1690        identifier for the covariance matrix
1691    grad : list or array
1692        Gradient of the Covobs wrt. the means belonging to cov.
1693    """
1694
1695    def covobs_to_obs(co):
1696        """Make an Obs out of a Covobs
1697
1698        Parameters
1699        ----------
1700        co : Covobs
1701            Covobs to be embedded into the Obs
1702        """
1703        o = Obs([], [], means=[])
1704        o._value = co.value
1705        o.names.append(co.name)
1706        o._covobs[co.name] = co
1707        o._dvalue = np.sqrt(co.errsq())
1708        return o
1709
1710    ol = []
1711    if isinstance(means, (float, int)):
1712        means = [means]
1713
1714    for i in range(len(means)):
1715        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1716    if ol[0].covobs[name].N != len(means):
1717        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1718    if len(ol) == 1:
1719        return ol[0]
1720    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters

mean (list of floats or float): N mean value(s) of the new Obs
cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
name (str): identifier for the covariance matrix
grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.