pyerrors.obs

View Source

   1import warnings
   2import hashlib
   3import pickle
   4import numpy as np
   5import autograd.numpy as anp  # Thinly-wrapped numpy
   6import scipy
   7from autograd import jacobian
   8import matplotlib.pyplot as plt
   9from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  10import numdifftools as nd
  11from itertools import groupby
  12from .covobs import Covobs
  13
  14# Improve print output of numpy.ndarrays containing Obs objects.
  15np.set_printoptions(formatter={'object': lambda x: str(x)})
  16
  17
  18class Obs:
  19    """Class for a general observable.
  20
  21    Instances of Obs are the basic objects of a pyerrors error analysis.
  22    They are initialized with a list which contains arrays of samples for
  23    different ensembles/replica and another list of same length which contains
  24    the names of the ensembles/replica. Mathematical operations can be
  25    performed on instances. The result is another instance of Obs. The error of
  26    an instance can be computed with the gamma_method. Also contains additional
  27    methods for output and visualization of the error calculation.
  28
  29    Attributes
  30    ----------
  31    S_global : float
  32        Standard value for S (default 2.0)
  33    S_dict : dict
  34        Dictionary for S values. If an entry for a given ensemble
  35        exists this overwrites the standard value for that ensemble.
  36    tau_exp_global : float
  37        Standard value for tau_exp (default 0.0)
  38    tau_exp_dict : dict
  39        Dictionary for tau_exp values. If an entry for a given ensemble exists
  40        this overwrites the standard value for that ensemble.
  41    N_sigma_global : float
  42        Standard value for N_sigma (default 1.0)
  43    N_sigma_dict : dict
  44        Dictionary for N_sigma values. If an entry for a given ensemble exists
  45        this overwrites the standard value for that ensemble.
  46    """
  47    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  48                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  49                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  50                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  51                 'idl', 'tag', '_covobs', '__dict__']
  52
  53    S_global = 2.0
  54    S_dict = {}
  55    tau_exp_global = 0.0
  56    tau_exp_dict = {}
  57    N_sigma_global = 1.0
  58    N_sigma_dict = {}
  59
  60    def __init__(self, samples, names, idl=None, **kwargs):
  61        """ Initialize Obs object.
  62
  63        Parameters
  64        ----------
  65        samples : list
  66            list of numpy arrays containing the Monte Carlo samples
  67        names : list
  68            list of strings labeling the individual samples
  69        idl : list, optional
  70            list of ranges or lists on which the samples are defined
  71        """
  72
  73        if kwargs.get("means") is None and len(samples):
  74            if len(samples) != len(names):
  75                raise ValueError('Length of samples and names incompatible.')
  76            if idl is not None:
  77                if len(idl) != len(names):
  78                    raise ValueError('Length of idl incompatible with samples and names.')
  79            name_length = len(names)
  80            if name_length > 1:
  81                if name_length != len(set(names)):
  82                    raise ValueError('Names are not unique.')
  83                if not all(isinstance(x, str) for x in names):
  84                    raise TypeError('All names have to be strings.')
  85                if len(set([o.split('|')[0] for o in names])) > 1:
  86                    raise ValueError('Cannot initialize Obs based on multiple ensembles. Please average separate Obs from each ensemble.')
  87            else:
  88                if not isinstance(names[0], str):
  89                    raise TypeError('All names have to be strings.')
  90            if min(len(x) for x in samples) <= 4:
  91                raise ValueError('Samples have to have at least 5 entries.')
  92
  93        self.names = sorted(names)
  94        self.shape = {}
  95        self.r_values = {}
  96        self.deltas = {}
  97        self._covobs = {}
  98
  99        self._value = 0
 100        self.N = 0
 101        self.idl = {}
 102        if idl is not None:
 103            for name, idx in sorted(zip(names, idl)):
 104                if isinstance(idx, range):
 105                    self.idl[name] = idx
 106                elif isinstance(idx, (list, np.ndarray)):
 107                    dc = np.unique(np.diff(idx))
 108                    if np.any(dc < 0):
 109                        raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]])))
 110                    elif np.any(dc == 0):
 111                        raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]])))
 112                    if len(dc) == 1:
 113                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 114                    else:
 115                        self.idl[name] = list(idx)
 116                else:
 117                    raise TypeError('incompatible type for idl[%s].' % (name))
 118        else:
 119            for name, sample in sorted(zip(names, samples)):
 120                self.idl[name] = range(1, len(sample) + 1)
 121
 122        if kwargs.get("means") is not None:
 123            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 124                self.shape[name] = len(self.idl[name])
 125                self.N += self.shape[name]
 126                self.r_values[name] = mean
 127                self.deltas[name] = sample
 128        else:
 129            for name, sample in sorted(zip(names, samples)):
 130                self.shape[name] = len(self.idl[name])
 131                self.N += self.shape[name]
 132                if len(sample) != self.shape[name]:
 133                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 134                self.r_values[name] = np.mean(sample)
 135                self.deltas[name] = sample - self.r_values[name]
 136                self._value += self.shape[name] * self.r_values[name]
 137            self._value /= self.N
 138
 139        self._dvalue = 0.0
 140        self.ddvalue = 0.0
 141        self.reweighted = False
 142
 143        self.tag = None
 144
 145    @property
 146    def value(self):
 147        return self._value
 148
 149    @property
 150    def dvalue(self):
 151        return self._dvalue
 152
 153    @property
 154    def e_names(self):
 155        return sorted(set([o.split('|')[0] for o in self.names]))
 156
 157    @property
 158    def cov_names(self):
 159        return sorted(set([o for o in self.covobs.keys()]))
 160
 161    @property
 162    def mc_names(self):
 163        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 164
 165    @property
 166    def e_content(self):
 167        res = {}
 168        for e, e_name in enumerate(self.e_names):
 169            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 170            if e_name in self.names:
 171                res[e_name].append(e_name)
 172        return res
 173
 174    @property
 175    def covobs(self):
 176        return self._covobs
 177
 178    def gamma_method(self, **kwargs):
 179        """Estimate the error and related properties of the Obs.
 180
 181        Parameters
 182        ----------
 183        S : float
 184            specifies a custom value for the parameter S (default 2.0).
 185            If set to 0 it is assumed that the data exhibits no
 186            autocorrelation. In this case the error estimates coincides
 187            with the sample standard error.
 188        tau_exp : float
 189            positive value triggers the critical slowing down analysis
 190            (default 0.0).
 191        N_sigma : float
 192            number of standard deviations from zero until the tail is
 193            attached to the autocorrelation function (default 1).
 194        fft : bool
 195            determines whether the fft algorithm is used for the computation
 196            of the autocorrelation function (default True)
 197        """
 198
 199        e_content = self.e_content
 200        self.e_dvalue = {}
 201        self.e_ddvalue = {}
 202        self.e_tauint = {}
 203        self.e_dtauint = {}
 204        self.e_windowsize = {}
 205        self.e_n_tauint = {}
 206        self.e_n_dtauint = {}
 207        e_gamma = {}
 208        self.e_rho = {}
 209        self.e_drho = {}
 210        self._dvalue = 0
 211        self.ddvalue = 0
 212
 213        self.S = {}
 214        self.tau_exp = {}
 215        self.N_sigma = {}
 216
 217        if kwargs.get('fft') is False:
 218            fft = False
 219        else:
 220            fft = True
 221
 222        def _parse_kwarg(kwarg_name):
 223            if kwarg_name in kwargs:
 224                tmp = kwargs.get(kwarg_name)
 225                if isinstance(tmp, (int, float)):
 226                    if tmp < 0:
 227                        raise ValueError(kwarg_name + ' has to be larger or equal to 0.')
 228                    for e, e_name in enumerate(self.e_names):
 229                        getattr(self, kwarg_name)[e_name] = tmp
 230                else:
 231                    raise TypeError(kwarg_name + ' is not in proper format.')
 232            else:
 233                for e, e_name in enumerate(self.e_names):
 234                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 236                    else:
 237                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 238
 239        _parse_kwarg('S')
 240        _parse_kwarg('tau_exp')
 241        _parse_kwarg('N_sigma')
 242
 243        for e, e_name in enumerate(self.mc_names):
 244            gapsize = _determine_gap(self, e_content, e_name)
 245
 246            r_length = []
 247            for r_name in e_content[e_name]:
 248                if isinstance(self.idl[r_name], range):
 249                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
 250                else:
 251                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
 252
 253            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 254            w_max = max(r_length) // 2
 255            e_gamma[e_name] = np.zeros(w_max)
 256            self.e_rho[e_name] = np.zeros(w_max)
 257            self.e_drho[e_name] = np.zeros(w_max)
 258
 259            for r_name in e_content[e_name]:
 260                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 261
 262            gamma_div = np.zeros(w_max)
 263            for r_name in e_content[e_name]:
 264                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 265            gamma_div[gamma_div < 1] = 1.0
 266            e_gamma[e_name] /= gamma_div[:w_max]
 267
 268            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 269                self.e_tauint[e_name] = 0.5
 270                self.e_dtauint[e_name] = 0.0
 271                self.e_dvalue[e_name] = 0.0
 272                self.e_ddvalue[e_name] = 0.0
 273                self.e_windowsize[e_name] = 0
 274                continue
 275
 276            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 277            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 278            # Make sure no entry of tauint is smaller than 0.5
 279            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 280            # hep-lat/0306017 eq. (42)
 281            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 282            self.e_n_dtauint[e_name][0] = 0.0
 283
 284            def _compute_drho(i):
 285                tmp = (self.e_rho[e_name][i + 1:w_max]
 286                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
 287                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
 288                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
 289                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 290
 291            if self.tau_exp[e_name] > 0:
 292                _compute_drho(1)
 293                texp = self.tau_exp[e_name]
 294                # Critical slowing down analysis
 295                if w_max // 2 <= 1:
 296                    raise ValueError("Need at least 8 samples for tau_exp error analysis")
 297                for n in range(1, w_max // 2):
 298                    _compute_drho(n + 1)
 299                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 300                        # Bias correction hep-lat/0306017 eq. (49) included
 301                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 302                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 303                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 304                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 305                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 306                        self.e_windowsize[e_name] = n
 307                        break
 308            else:
 309                if self.S[e_name] == 0.0:
 310                    self.e_tauint[e_name] = 0.5
 311                    self.e_dtauint[e_name] = 0.0
 312                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 313                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 314                    self.e_windowsize[e_name] = 0
 315                else:
 316                    # Standard automatic windowing procedure
 317                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 318                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 319                    for n in range(1, w_max):
 320                        if g_w[n - 1] < 0 or n >= w_max - 1:
 321                            _compute_drho(n)
 322                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 323                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 324                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 325                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 326                            self.e_windowsize[e_name] = n
 327                            break
 328
 329            self._dvalue += self.e_dvalue[e_name] ** 2
 330            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 331
 332        for e_name in self.cov_names:
 333            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 334            self.e_ddvalue[e_name] = 0
 335            self._dvalue += self.e_dvalue[e_name]**2
 336
 337        self._dvalue = np.sqrt(self._dvalue)
 338        if self._dvalue == 0.0:
 339            self.ddvalue = 0.0
 340        else:
 341            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 342        return
 343
 344    gm = gamma_method
 345
 346    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
 347        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 348           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 349
 350        Parameters
 351        ----------
 352        deltas : list
 353            List of fluctuations
 354        idx : list
 355            List or range of configurations on which the deltas are defined.
 356        shape : int
 357            Number of configurations in idx.
 358        w_max : int
 359            Upper bound for the summation window.
 360        fft : bool
 361            determines whether the fft algorithm is used for the computation
 362            of the autocorrelation function.
 363        gapsize : int
 364            The target distance between two configurations. If longer distances
 365            are found in idx, the data is expanded.
 366        """
 367        gamma = np.zeros(w_max)
 368        deltas = _expand_deltas(deltas, idx, shape, gapsize)
 369        new_shape = len(deltas)
 370        if fft:
 371            max_gamma = min(new_shape, w_max)
 372            # The padding for the fft has to be even
 373            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 374            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 375        else:
 376            for n in range(w_max):
 377                if new_shape - n >= 0:
 378                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 379
 380        return gamma
 381
 382    def details(self, ens_content=True):
 383        """Output detailed properties of the Obs.
 384
 385        Parameters
 386        ----------
 387        ens_content : bool
 388            print details about the ensembles and replica if true.
 389        """
 390        if self.tag is not None:
 391            print("Description:", self.tag)
 392        if not hasattr(self, 'e_dvalue'):
 393            print('Result\t %3.8e' % (self.value))
 394        else:
 395            if self.value == 0.0:
 396                percentage = np.nan
 397            else:
 398                percentage = np.abs(self._dvalue / self.value) * 100
 399            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 400            if len(self.e_names) > 1:
 401                print(' Ensemble errors:')
 402            e_content = self.e_content
 403            for e_name in self.mc_names:
 404                gap = _determine_gap(self, e_content, e_name)
 405
 406                if len(self.e_names) > 1:
 407                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 408                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 409                tau_string += f" in units of {gap} config"
 410                if gap > 1:
 411                    tau_string += "s"
 412                if self.tau_exp[e_name] > 0:
 413                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 414                else:
 415                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 416                print(tau_string)
 417            for e_name in self.cov_names:
 418                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 419        if ens_content is True:
 420            if len(self.e_names) == 1:
 421                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 422            else:
 423                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 424            my_string_list = []
 425            for key, value in sorted(self.e_content.items()):
 426                if key not in self.covobs:
 427                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 428                    if len(value) == 1:
 429                        my_string += f': {self.shape[value[0]]} configurations'
 430                        if isinstance(self.idl[value[0]], range):
 431                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 432                        else:
 433                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 434                    else:
 435                        sublist = []
 436                        for v in value:
 437                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 438                            my_substring += f': {self.shape[v]} configurations'
 439                            if isinstance(self.idl[v], range):
 440                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 441                            else:
 442                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 443                            sublist.append(my_substring)
 444
 445                        my_string += '\n' + '\n'.join(sublist)
 446                else:
 447                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 448                my_string_list.append(my_string)
 449            print('\n'.join(my_string_list))
 450
 451    def reweight(self, weight):
 452        """Reweight the obs with given rewighting factors.
 453
 454        Parameters
 455        ----------
 456        weight : Obs
 457            Reweighting factor. An Observable that has to be defined on a superset of the
 458            configurations in obs[i].idl for all i.
 459        all_configs : bool
 460            if True, the reweighted observables are normalized by the average of
 461            the reweighting factor on all configurations in weight.idl and not
 462            on the configurations in obs[i].idl. Default False.
 463        """
 464        return reweight(weight, [self])[0]
 465
 466    def is_zero_within_error(self, sigma=1):
 467        """Checks whether the observable is zero within 'sigma' standard errors.
 468
 469        Parameters
 470        ----------
 471        sigma : int
 472            Number of standard errors used for the check.
 473
 474        Works only properly when the gamma method was run.
 475        """
 476        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 477
 478    def is_zero(self, atol=1e-10):
 479        """Checks whether the observable is zero within a given tolerance.
 480
 481        Parameters
 482        ----------
 483        atol : float
 484            Absolute tolerance (for details see numpy documentation).
 485        """
 486        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 487
 488    def plot_tauint(self, save=None):
 489        """Plot integrated autocorrelation time for each ensemble.
 490
 491        Parameters
 492        ----------
 493        save : str
 494            saves the figure to a file named 'save' if.
 495        """
 496        if not hasattr(self, 'e_dvalue'):
 497            raise Exception('Run the gamma method first.')
 498
 499        for e, e_name in enumerate(self.mc_names):
 500            fig = plt.figure()
 501            plt.xlabel(r'$W$')
 502            plt.ylabel(r'$\tau_\mathrm{int}$')
 503            length = int(len(self.e_n_tauint[e_name]))
 504            if self.tau_exp[e_name] > 0:
 505                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 506                x_help = np.arange(2 * self.tau_exp[e_name])
 507                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 508                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 509                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 510                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 511                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 512                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 513                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 514            else:
 515                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 516                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 517
 518            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 519            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 520            plt.legend()
 521            plt.xlim(-0.5, xmax)
 522            ylim = plt.ylim()
 523            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 524            plt.draw()
 525            if save:
 526                fig.savefig(save + "_" + str(e))
 527
 528    def plot_rho(self, save=None):
 529        """Plot normalized autocorrelation function time for each ensemble.
 530
 531        Parameters
 532        ----------
 533        save : str
 534            saves the figure to a file named 'save' if.
 535        """
 536        if not hasattr(self, 'e_dvalue'):
 537            raise Exception('Run the gamma method first.')
 538        for e, e_name in enumerate(self.mc_names):
 539            fig = plt.figure()
 540            plt.xlabel('W')
 541            plt.ylabel('rho')
 542            length = int(len(self.e_drho[e_name]))
 543            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 544            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 545            if self.tau_exp[e_name] > 0:
 546                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 547                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 548                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 549                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 550            else:
 551                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 552                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 553            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 554            plt.xlim(-0.5, xmax)
 555            plt.draw()
 556            if save:
 557                fig.savefig(save + "_" + str(e))
 558
 559    def plot_rep_dist(self):
 560        """Plot replica distribution for each ensemble with more than one replicum."""
 561        if not hasattr(self, 'e_dvalue'):
 562            raise Exception('Run the gamma method first.')
 563        for e, e_name in enumerate(self.mc_names):
 564            if len(self.e_content[e_name]) == 1:
 565                print('No replica distribution for a single replicum (', e_name, ')')
 566                continue
 567            r_length = []
 568            sub_r_mean = 0
 569            for r, r_name in enumerate(self.e_content[e_name]):
 570                r_length.append(len(self.deltas[r_name]))
 571                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 572            e_N = np.sum(r_length)
 573            sub_r_mean /= e_N
 574            arr = np.zeros(len(self.e_content[e_name]))
 575            for r, r_name in enumerate(self.e_content[e_name]):
 576                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 577            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 578            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 579            plt.draw()
 580
 581    def plot_history(self, expand=True):
 582        """Plot derived Monte Carlo history for each ensemble
 583
 584        Parameters
 585        ----------
 586        expand : bool
 587            show expanded history for irregular Monte Carlo chains (default: True).
 588        """
 589        for e, e_name in enumerate(self.mc_names):
 590            plt.figure()
 591            r_length = []
 592            tmp = []
 593            tmp_expanded = []
 594            for r, r_name in enumerate(self.e_content[e_name]):
 595                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 596                if expand:
 597                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
 598                    r_length.append(len(tmp_expanded[-1]))
 599                else:
 600                    r_length.append(len(tmp[-1]))
 601            e_N = np.sum(r_length)
 602            x = np.arange(e_N)
 603            y_test = np.concatenate(tmp, axis=0)
 604            if expand:
 605                y = np.concatenate(tmp_expanded, axis=0)
 606            else:
 607                y = y_test
 608            plt.errorbar(x, y, fmt='.', markersize=3)
 609            plt.xlim(-0.5, e_N - 0.5)
 610            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 611            plt.draw()
 612
 613    def plot_piechart(self, save=None):
 614        """Plot piechart which shows the fractional contribution of each
 615        ensemble to the error and returns a dictionary containing the fractions.
 616
 617        Parameters
 618        ----------
 619        save : str
 620            saves the figure to a file named 'save' if.
 621        """
 622        if not hasattr(self, 'e_dvalue'):
 623            raise Exception('Run the gamma method first.')
 624        if np.isclose(0.0, self._dvalue, atol=1e-15):
 625            raise ValueError('Error is 0.0')
 626        labels = self.e_names
 627        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 628        fig1, ax1 = plt.subplots()
 629        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 630        ax1.axis('equal')
 631        plt.draw()
 632        if save:
 633            fig1.savefig(save)
 634
 635        return dict(zip(labels, sizes))
 636
 637    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 638        """Dump the Obs to a file 'name' of chosen format.
 639
 640        Parameters
 641        ----------
 642        filename : str
 643            name of the file to be saved.
 644        datatype : str
 645            Format of the exported file. Supported formats include
 646            "json.gz" and "pickle"
 647        description : str
 648            Description for output file, only relevant for json.gz format.
 649        path : str
 650            specifies a custom path for the file (default '.')
 651        """
 652        if 'path' in kwargs:
 653            file_name = kwargs.get('path') + '/' + filename
 654        else:
 655            file_name = filename
 656
 657        if datatype == "json.gz":
 658            from .input.json import dump_to_json
 659            dump_to_json([self], file_name, description=description)
 660        elif datatype == "pickle":
 661            with open(file_name + '.p', 'wb') as fb:
 662                pickle.dump(self, fb)
 663        else:
 664            raise TypeError("Unknown datatype " + str(datatype))
 665
 666    def export_jackknife(self):
 667        """Export jackknife samples from the Obs
 668
 669        Returns
 670        -------
 671        numpy.ndarray
 672            Returns a numpy array of length N + 1 where N is the number of samples
 673            for the given ensemble and replicum. The zeroth entry of the array contains
 674            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 675            derived from the Obs. The current implementation only works for observables
 676            defined on exactly one ensemble and replicum. The derived jackknife samples
 677            should agree with samples from a full jackknife analysis up to O(1/N).
 678        """
 679
 680        if len(self.names) != 1:
 681            raise ValueError("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 682
 683        name = self.names[0]
 684        full_data = self.deltas[name] + self.r_values[name]
 685        n = full_data.size
 686        mean = self.value
 687        tmp_jacks = np.zeros(n + 1)
 688        tmp_jacks[0] = mean
 689        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 690        return tmp_jacks
 691
 692    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
 693        """Export bootstrap samples from the Obs
 694
 695        Parameters
 696        ----------
 697        samples : int
 698            Number of bootstrap samples to generate.
 699        random_numbers : np.ndarray
 700            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
 701            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
 702        save_rng : str
 703            Save the random numbers to a file if a path is specified.
 704
 705        Returns
 706        -------
 707        numpy.ndarray
 708            Returns a numpy array of length N + 1 where N is the number of samples
 709            for the given ensemble and replicum. The zeroth entry of the array contains
 710            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
 711            derived from the Obs. The current implementation only works for observables
 712            defined on exactly one ensemble and replicum. The derived bootstrap samples
 713            should agree with samples from a full bootstrap analysis up to O(1/N).
 714        """
 715        if len(self.names) != 1:
 716            raise ValueError("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
 717
 718        name = self.names[0]
 719        length = self.N
 720
 721        if random_numbers is None:
 722            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
 723            rng = np.random.default_rng(seed)
 724            random_numbers = rng.integers(0, length, size=(samples, length))
 725
 726        if save_rng is not None:
 727            np.savetxt(save_rng, random_numbers, fmt='%i')
 728
 729        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
 730        ret = np.zeros(samples + 1)
 731        ret[0] = self.value
 732        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
 733        return ret
 734
 735    def __float__(self):
 736        return float(self.value)
 737
 738    def __repr__(self):
 739        return 'Obs[' + str(self) + ']'
 740
 741    def __str__(self):
 742        return _format_uncertainty(self.value, self._dvalue)
 743
 744    def __format__(self, format_type):
 745        if format_type == "":
 746            significance = 2
 747        else:
 748            significance = int(float(format_type.replace("+", "").replace("-", "")))
 749        my_str = _format_uncertainty(self.value, self._dvalue,
 750                                     significance=significance)
 751        for char in ["+", " "]:
 752            if format_type.startswith(char):
 753                if my_str[0] != "-":
 754                    my_str = char + my_str
 755        return my_str
 756
 757    def __hash__(self):
 758        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 759        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 760        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 761        hash_tuple += tuple([o.encode() for o in self.names])
 762        m = hashlib.md5()
 763        [m.update(o) for o in hash_tuple]
 764        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 765
 766    # Overload comparisons
 767    def __lt__(self, other):
 768        return self.value < other
 769
 770    def __le__(self, other):
 771        return self.value <= other
 772
 773    def __gt__(self, other):
 774        return self.value > other
 775
 776    def __ge__(self, other):
 777        return self.value >= other
 778
 779    def __eq__(self, other):
 780        if other is None:
 781            return False
 782        return (self - other).is_zero()
 783
 784    # Overload math operations
 785    def __add__(self, y):
 786        if isinstance(y, Obs):
 787            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 788        else:
 789            if isinstance(y, np.ndarray):
 790                return np.array([self + o for o in y])
 791            elif isinstance(y, complex):
 792                return CObs(self, 0) + y
 793            elif y.__class__.__name__ in ['Corr', 'CObs']:
 794                return NotImplemented
 795            else:
 796                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 797
 798    def __radd__(self, y):
 799        return self + y
 800
 801    def __mul__(self, y):
 802        if isinstance(y, Obs):
 803            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 804        else:
 805            if isinstance(y, np.ndarray):
 806                return np.array([self * o for o in y])
 807            elif isinstance(y, complex):
 808                return CObs(self * y.real, self * y.imag)
 809            elif y.__class__.__name__ in ['Corr', 'CObs']:
 810                return NotImplemented
 811            else:
 812                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 813
 814    def __rmul__(self, y):
 815        return self * y
 816
 817    def __sub__(self, y):
 818        if isinstance(y, Obs):
 819            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 820        else:
 821            if isinstance(y, np.ndarray):
 822                return np.array([self - o for o in y])
 823            elif y.__class__.__name__ in ['Corr', 'CObs']:
 824                return NotImplemented
 825            else:
 826                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 827
 828    def __rsub__(self, y):
 829        return -1 * (self - y)
 830
 831    def __pos__(self):
 832        return self
 833
 834    def __neg__(self):
 835        return -1 * self
 836
 837    def __truediv__(self, y):
 838        if isinstance(y, Obs):
 839            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 840        else:
 841            if isinstance(y, np.ndarray):
 842                return np.array([self / o for o in y])
 843            elif y.__class__.__name__ in ['Corr', 'CObs']:
 844                return NotImplemented
 845            else:
 846                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 847
 848    def __rtruediv__(self, y):
 849        if isinstance(y, Obs):
 850            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 851        else:
 852            if isinstance(y, np.ndarray):
 853                return np.array([o / self for o in y])
 854            elif y.__class__.__name__ in ['Corr', 'CObs']:
 855                return NotImplemented
 856            else:
 857                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 858
 859    def __pow__(self, y):
 860        if isinstance(y, Obs):
 861            return derived_observable(lambda x, **kwargs: x[0] ** x[1], [self, y], man_grad=[y.value * self.value ** (y.value - 1), self.value ** y.value * np.log(self.value)])
 862        else:
 863            return derived_observable(lambda x, **kwargs: x[0] ** y, [self], man_grad=[y * self.value ** (y - 1)])
 864
 865    def __rpow__(self, y):
 866        return derived_observable(lambda x, **kwargs: y ** x[0], [self], man_grad=[y ** self.value * np.log(y)])
 867
 868    def __abs__(self):
 869        return derived_observable(lambda x: anp.abs(x[0]), [self])
 870
 871    # Overload numpy functions
 872    def sqrt(self):
 873        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 874
 875    def log(self):
 876        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 877
 878    def exp(self):
 879        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 880
 881    def sin(self):
 882        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 883
 884    def cos(self):
 885        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 886
 887    def tan(self):
 888        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 889
 890    def arcsin(self):
 891        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 892
 893    def arccos(self):
 894        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 895
 896    def arctan(self):
 897        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 898
 899    def sinh(self):
 900        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 901
 902    def cosh(self):
 903        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 904
 905    def tanh(self):
 906        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 907
 908    def arcsinh(self):
 909        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 910
 911    def arccosh(self):
 912        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 913
 914    def arctanh(self):
 915        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 916
 917
 918class CObs:
 919    """Class for a complex valued observable."""
 920    __slots__ = ['_real', '_imag', 'tag']
 921
 922    def __init__(self, real, imag=0.0):
 923        self._real = real
 924        self._imag = imag
 925        self.tag = None
 926
 927    @property
 928    def real(self):
 929        return self._real
 930
 931    @property
 932    def imag(self):
 933        return self._imag
 934
 935    def gamma_method(self, **kwargs):
 936        """Executes the gamma_method for the real and the imaginary part."""
 937        if isinstance(self.real, Obs):
 938            self.real.gamma_method(**kwargs)
 939        if isinstance(self.imag, Obs):
 940            self.imag.gamma_method(**kwargs)
 941
 942    def is_zero(self):
 943        """Checks whether both real and imaginary part are zero within machine precision."""
 944        return self.real == 0.0 and self.imag == 0.0
 945
 946    def conjugate(self):
 947        return CObs(self.real, -self.imag)
 948
 949    def __add__(self, other):
 950        if isinstance(other, np.ndarray):
 951            return other + self
 952        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 953            return CObs(self.real + other.real,
 954                        self.imag + other.imag)
 955        else:
 956            return CObs(self.real + other, self.imag)
 957
 958    def __radd__(self, y):
 959        return self + y
 960
 961    def __sub__(self, other):
 962        if isinstance(other, np.ndarray):
 963            return -1 * (other - self)
 964        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 965            return CObs(self.real - other.real, self.imag - other.imag)
 966        else:
 967            return CObs(self.real - other, self.imag)
 968
 969    def __rsub__(self, other):
 970        return -1 * (self - other)
 971
 972    def __mul__(self, other):
 973        if isinstance(other, np.ndarray):
 974            return other * self
 975        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 976            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 977                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 978                                               [self.real, other.real, self.imag, other.imag],
 979                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 980                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 981                                               [self.real, other.real, self.imag, other.imag],
 982                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 983            elif getattr(other, 'imag', 0) != 0:
 984                return CObs(self.real * other.real - self.imag * other.imag,
 985                            self.imag * other.real + self.real * other.imag)
 986            else:
 987                return CObs(self.real * other.real, self.imag * other.real)
 988        else:
 989            return CObs(self.real * other, self.imag * other)
 990
 991    def __rmul__(self, other):
 992        return self * other
 993
 994    def __truediv__(self, other):
 995        if isinstance(other, np.ndarray):
 996            return 1 / (other / self)
 997        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 998            r = other.real ** 2 + other.imag ** 2
 999            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
1000        else:
1001            return CObs(self.real / other, self.imag / other)
1002
1003    def __rtruediv__(self, other):
1004        r = self.real ** 2 + self.imag ** 2
1005        if hasattr(other, 'real') and hasattr(other, 'imag'):
1006            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1007        else:
1008            return CObs(self.real * other / r, -self.imag * other / r)
1009
1010    def __abs__(self):
1011        return np.sqrt(self.real**2 + self.imag**2)
1012
1013    def __pos__(self):
1014        return self
1015
1016    def __neg__(self):
1017        return -1 * self
1018
1019    def __eq__(self, other):
1020        return self.real == other.real and self.imag == other.imag
1021
1022    def __str__(self):
1023        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1024
1025    def __repr__(self):
1026        return 'CObs[' + str(self) + ']'
1027
1028    def __format__(self, format_type):
1029        if format_type == "":
1030            significance = 2
1031            format_type = "2"
1032        else:
1033            significance = int(float(format_type.replace("+", "").replace("-", "")))
1034        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"
1035
1036
1037def gamma_method(x, **kwargs):
1038    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1039
1040    See docstring of pe.Obs.gamma_method for details.
1041    """
1042    return np.vectorize(lambda o: o.gm(**kwargs))(x)
1043
1044
1045gm = gamma_method
1046
1047
1048def _format_uncertainty(value, dvalue, significance=2):
1049    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
1050    if dvalue == 0.0 or (not np.isfinite(dvalue)):
1051        return str(value)
1052    if not isinstance(significance, int):
1053        raise TypeError("significance needs to be an integer.")
1054    if significance < 1:
1055        raise ValueError("significance needs to be larger than zero.")
1056    fexp = np.floor(np.log10(dvalue))
1057    if fexp < 0.0:
1058        return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f')
1059    elif fexp == 0.0:
1060        return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})"
1061    else:
1062        return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})"
1063
1064
1065def _expand_deltas(deltas, idx, shape, gapsize):
1066    """Expand deltas defined on idx to a regular range with spacing gapsize between two
1067       configurations and where holes are filled by 0.
1068       If idx is of type range, the deltas are not changed if the idx.step == gapsize.
1069
1070    Parameters
1071    ----------
1072    deltas : list
1073        List of fluctuations
1074    idx : list
1075        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1076    shape : int
1077        Number of configs in idx.
1078    gapsize : int
1079        The target distance between two configurations. If longer distances
1080        are found in idx, the data is expanded.
1081    """
1082    if isinstance(idx, range):
1083        if (idx.step == gapsize):
1084            return deltas
1085    ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize)
1086    for i in range(shape):
1087        ret[(idx[i] - idx[0]) // gapsize] = deltas[i]
1088    return ret
1089
1090
1091def _merge_idx(idl):
1092    """Returns the union of all lists in idl as range or sorted list
1093
1094    Parameters
1095    ----------
1096    idl : list
1097        List of lists or ranges.
1098    """
1099
1100    if _check_lists_equal(idl):
1101        return idl[0]
1102
1103    idunion = sorted(set().union(*idl))
1104
1105    # Check whether idunion can be expressed as range
1106    idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0])
1107    idtest = [list(idrange), idunion]
1108    if _check_lists_equal(idtest):
1109        return idrange
1110
1111    return idunion
1112
1113
1114def _intersection_idx(idl):
1115    """Returns the intersection of all lists in idl as range or sorted list
1116
1117    Parameters
1118    ----------
1119    idl : list
1120        List of lists or ranges.
1121    """
1122
1123    if _check_lists_equal(idl):
1124        return idl[0]
1125
1126    idinter = sorted(set.intersection(*[set(o) for o in idl]))
1127
1128    # Check whether idinter can be expressed as range
1129    try:
1130        idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0])
1131        idtest = [list(idrange), idinter]
1132        if _check_lists_equal(idtest):
1133            return idrange
1134    except IndexError:
1135        pass
1136
1137    return idinter
1138
1139
1140def _expand_deltas_for_merge(deltas, idx, shape, new_idx, scalefactor):
1141    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1142       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1143       common divisor of the step sizes is used as new step size.
1144
1145    Parameters
1146    ----------
1147    deltas : list
1148        List of fluctuations
1149    idx : list
1150        List or range of configs on which the deltas are defined.
1151        Has to be a subset of new_idx and has to be sorted in ascending order.
1152    shape : list
1153        Number of configs in idx.
1154    new_idx : list
1155        List of configs that defines the new range, has to be sorted in ascending order.
1156    scalefactor : float
1157        An additional scaling factor that can be applied to scale the fluctuations,
1158        e.g., when Obs with differing numbers of replica are merged.
1159    """
1160    if type(idx) is range and type(new_idx) is range:
1161        if idx == new_idx:
1162            if scalefactor == 1:
1163                return deltas
1164            else:
1165                return deltas * scalefactor
1166    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1167    for i in range(shape):
1168        ret[idx[i] - new_idx[0]] = deltas[i]
1169    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx) * scalefactor
1170
1171
1172def derived_observable(func, data, array_mode=False, **kwargs):
1173    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1174
1175    Parameters
1176    ----------
1177    func : object
1178        arbitrary function of the form func(data, **kwargs). For the
1179        automatic differentiation to work, all numpy functions have to have
1180        the autograd wrapper (use 'import autograd.numpy as anp').
1181    data : list
1182        list of Obs, e.g. [obs1, obs2, obs3].
1183    num_grad : bool
1184        if True, numerical derivatives are used instead of autograd
1185        (default False). To control the numerical differentiation the
1186        kwargs of numdifftools.step_generators.MaxStepGenerator
1187        can be used.
1188    man_grad : list
1189        manually supply a list or an array which contains the jacobian
1190        of func. Use cautiously, supplying the wrong derivative will
1191        not be intercepted.
1192
1193    Notes
1194    -----
1195    For simple mathematical operations it can be practical to use anonymous
1196    functions. For the ratio of two observables one can e.g. use
1197
1198    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1199    """
1200
1201    data = np.asarray(data)
1202    raveled_data = data.ravel()
1203
1204    # Workaround for matrix operations containing non Obs data
1205    if not all(isinstance(x, Obs) for x in raveled_data):
1206        for i in range(len(raveled_data)):
1207            if isinstance(raveled_data[i], (int, float)):
1208                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1209
1210    allcov = {}
1211    for o in raveled_data:
1212        for name in o.cov_names:
1213            if name in allcov:
1214                if not np.allclose(allcov[name], o.covobs[name].cov):
1215                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1216            else:
1217                allcov[name] = o.covobs[name].cov
1218
1219    n_obs = len(raveled_data)
1220    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1221    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1222    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1223
1224    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1225
1226    if data.ndim == 1:
1227        values = np.array([o.value for o in data])
1228    else:
1229        values = np.vectorize(lambda x: x.value)(data)
1230
1231    new_values = func(values, **kwargs)
1232
1233    multi = int(isinstance(new_values, np.ndarray))
1234
1235    new_r_values = {}
1236    new_idl_d = {}
1237    for name in new_sample_names:
1238        idl = []
1239        tmp_values = np.zeros(n_obs)
1240        for i, item in enumerate(raveled_data):
1241            tmp_values[i] = item.r_values.get(name, item.value)
1242            tmp_idl = item.idl.get(name)
1243            if tmp_idl is not None:
1244                idl.append(tmp_idl)
1245        if multi > 0:
1246            tmp_values = np.array(tmp_values).reshape(data.shape)
1247        new_r_values[name] = func(tmp_values, **kwargs)
1248        new_idl_d[name] = _merge_idx(idl)
1249
1250    def _compute_scalefactor_missing_rep(obs):
1251        """
1252        Computes the scale factor that is to be multiplied with the deltas
1253        in the case where Obs with different subsets of replica are merged.
1254        Returns a dictionary with the scale factor for each Monte Carlo name.
1255
1256        Parameters
1257        ----------
1258        obs : Obs
1259            The observable corresponding to the deltas that are to be scaled
1260        """
1261        scalef_d = {}
1262        for mc_name in obs.mc_names:
1263            mc_idl_d = [name for name in obs.idl if name.startswith(mc_name + '|')]
1264            new_mc_idl_d = [name for name in new_idl_d if name.startswith(mc_name + '|')]
1265            if len(mc_idl_d) > 0 and len(mc_idl_d) < len(new_mc_idl_d):
1266                scalef_d[mc_name] = sum([len(new_idl_d[name]) for name in new_mc_idl_d]) / sum([len(new_idl_d[name]) for name in mc_idl_d])
1267        return scalef_d
1268
1269    if 'man_grad' in kwargs:
1270        deriv = np.asarray(kwargs.get('man_grad'))
1271        if new_values.shape + data.shape != deriv.shape:
1272            raise ValueError('Manual derivative does not have correct shape.')
1273    elif kwargs.get('num_grad') is True:
1274        if multi > 0:
1275            raise Exception('Multi mode currently not supported for numerical derivative')
1276        options = {
1277            'base_step': 0.1,
1278            'step_ratio': 2.5}
1279        for key in options.keys():
1280            kwarg = kwargs.get(key)
1281            if kwarg is not None:
1282                options[key] = kwarg
1283        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1284        if tmp_df.size == 1:
1285            deriv = np.array([tmp_df.real])
1286        else:
1287            deriv = tmp_df.real
1288    else:
1289        deriv = jacobian(func)(values, **kwargs)
1290
1291    final_result = np.zeros(new_values.shape, dtype=object)
1292
1293    if array_mode is True:
1294
1295        class _Zero_grad():
1296            def __init__(self, N):
1297                self.grad = np.zeros((N, 1))
1298
1299        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1300        d_extracted = {}
1301        g_extracted = {}
1302        for name in new_sample_names:
1303            d_extracted[name] = []
1304            ens_length = len(new_idl_d[name])
1305            for i_dat, dat in enumerate(data):
1306                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name], _compute_scalefactor_missing_rep(o).get(name.split('|')[0], 1)) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1307        for name in new_cov_names:
1308            g_extracted[name] = []
1309            zero_grad = _Zero_grad(new_covobs_lengths[name])
1310            for i_dat, dat in enumerate(data):
1311                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1312
1313    for i_val, new_val in np.ndenumerate(new_values):
1314        new_deltas = {}
1315        new_grad = {}
1316        if array_mode is True:
1317            for name in new_sample_names:
1318                ens_length = d_extracted[name][0].shape[-1]
1319                new_deltas[name] = np.zeros(ens_length)
1320                for i_dat, dat in enumerate(d_extracted[name]):
1321                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1322            for name in new_cov_names:
1323                new_grad[name] = 0
1324                for i_dat, dat in enumerate(g_extracted[name]):
1325                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1326        else:
1327            for j_obs, obs in np.ndenumerate(data):
1328                scalef_d = _compute_scalefactor_missing_rep(obs)
1329                for name in obs.names:
1330                    if name in obs.cov_names:
1331                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1332                    else:
1333                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name], scalef_d.get(name.split('|')[0], 1))
1334
1335        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1336
1337        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1338            raise ValueError('The same name has been used for deltas and covobs!')
1339        new_samples = []
1340        new_means = []
1341        new_idl = []
1342        new_names_obs = []
1343        for name in new_names:
1344            if name not in new_covobs:
1345                new_samples.append(new_deltas[name])
1346                new_idl.append(new_idl_d[name])
1347                new_means.append(new_r_values[name][i_val])
1348                new_names_obs.append(name)
1349        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1350        for name in new_covobs:
1351            final_result[i_val].names.append(name)
1352        final_result[i_val]._covobs = new_covobs
1353        final_result[i_val]._value = new_val
1354        final_result[i_val].reweighted = reweighted
1355
1356    if multi == 0:
1357        final_result = final_result.item()
1358
1359    return final_result
1360
1361
1362def _reduce_deltas(deltas, idx_old, idx_new):
1363    """Extract deltas defined on idx_old on all configs of idx_new.
1364
1365    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1366    are ordered in an ascending order.
1367
1368    Parameters
1369    ----------
1370    deltas : list
1371        List of fluctuations
1372    idx_old : list
1373        List or range of configs on which the deltas are defined
1374    idx_new : list
1375        List of configs for which we want to extract the deltas.
1376        Has to be a subset of idx_old.
1377    """
1378    if not len(deltas) == len(idx_old):
1379        raise ValueError('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1380    if type(idx_old) is range and type(idx_new) is range:
1381        if idx_old == idx_new:
1382            return deltas
1383    if _check_lists_equal([idx_old, idx_new]):
1384        return deltas
1385    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1386    if len(indices) < len(idx_new):
1387        raise ValueError('Error in _reduce_deltas: Config of idx_new not in idx_old')
1388    return np.array(deltas)[indices]
1389
1390
1391def reweight(weight, obs, **kwargs):
1392    """Reweight a list of observables.
1393
1394    Parameters
1395    ----------
1396    weight : Obs
1397        Reweighting factor. An Observable that has to be defined on a superset of the
1398        configurations in obs[i].idl for all i.
1399    obs : list
1400        list of Obs, e.g. [obs1, obs2, obs3].
1401    all_configs : bool
1402        if True, the reweighted observables are normalized by the average of
1403        the reweighting factor on all configurations in weight.idl and not
1404        on the configurations in obs[i].idl. Default False.
1405    """
1406    result = []
1407    for i in range(len(obs)):
1408        if len(obs[i].cov_names):
1409            raise ValueError('Error: Not possible to reweight an Obs that contains covobs!')
1410        if not set(obs[i].names).issubset(weight.names):
1411            raise ValueError('Error: Ensembles do not fit')
1412        if len(obs[i].mc_names) > 1 or len(weight.mc_names) > 1:
1413            raise ValueError('Error: Cannot reweight an Obs that contains multiple ensembles.')
1414        for name in obs[i].names:
1415            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1416                raise ValueError('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1417        new_samples = []
1418        w_deltas = {}
1419        for name in sorted(obs[i].names):
1420            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1421            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1422        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1423
1424        if kwargs.get('all_configs'):
1425            new_weight = weight
1426        else:
1427            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1428
1429        result.append(tmp_obs / new_weight)
1430        result[-1].reweighted = True
1431
1432    return result
1433
1434
1435def correlate(obs_a, obs_b):
1436    """Correlate two observables.
1437
1438    Parameters
1439    ----------
1440    obs_a : Obs
1441        First observable
1442    obs_b : Obs
1443        Second observable
1444
1445    Notes
1446    -----
1447    Keep in mind to only correlate primary observables which have not been reweighted
1448    yet. The reweighting has to be applied after correlating the observables.
1449    Only works if a single ensemble is present in the Obs.
1450    Currently only works if ensemble content is identical (this is not strictly necessary).
1451    """
1452
1453    if len(obs_a.mc_names) > 1 or len(obs_b.mc_names) > 1:
1454        raise ValueError('Error: Cannot correlate Obs that contain multiple ensembles.')
1455    if sorted(obs_a.names) != sorted(obs_b.names):
1456        raise ValueError(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1457    if len(obs_a.cov_names) or len(obs_b.cov_names):
1458        raise ValueError('Error: Not possible to correlate Obs that contain covobs!')
1459    for name in obs_a.names:
1460        if obs_a.shape[name] != obs_b.shape[name]:
1461            raise ValueError('Shapes of ensemble', name, 'do not fit')
1462        if obs_a.idl[name] != obs_b.idl[name]:
1463            raise ValueError('idl of ensemble', name, 'do not fit')
1464
1465    if obs_a.reweighted is True:
1466        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1467    if obs_b.reweighted is True:
1468        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1469
1470    new_samples = []
1471    new_idl = []
1472    for name in sorted(obs_a.names):
1473        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1474        new_idl.append(obs_a.idl[name])
1475
1476    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1477    o.reweighted = obs_a.reweighted or obs_b.reweighted
1478    return o
1479
1480
1481def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1482    r'''Calculates the error covariance matrix of a set of observables.
1483
1484    WARNING: This function should be used with care, especially for observables with support on multiple
1485             ensembles with differing autocorrelations. See the notes below for details.
1486
1487    The gamma method has to be applied first to all observables.
1488
1489    Parameters
1490    ----------
1491    obs : list or numpy.ndarray
1492        List or one dimensional array of Obs
1493    visualize : bool
1494        If True plots the corresponding normalized correlation matrix (default False).
1495    correlation : bool
1496        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1497    smooth : None or int
1498        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1499        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1500        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1501        small ones.
1502
1503    Notes
1504    -----
1505    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1506    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1507    in the absence of autocorrelation.
1508    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1509    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1510    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1511    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1512    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1513    '''
1514
1515    length = len(obs)
1516
1517    max_samples = np.max([o.N for o in obs])
1518    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1519        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1520
1521    cov = np.zeros((length, length))
1522    for i in range(length):
1523        for j in range(i, length):
1524            cov[i, j] = _covariance_element(obs[i], obs[j])
1525    cov = cov + cov.T - np.diag(np.diag(cov))
1526
1527    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1528
1529    if isinstance(smooth, int):
1530        corr = _smooth_eigenvalues(corr, smooth)
1531
1532    if visualize:
1533        plt.matshow(corr, vmin=-1, vmax=1)
1534        plt.set_cmap('RdBu')
1535        plt.colorbar()
1536        plt.draw()
1537
1538    if correlation is True:
1539        return corr
1540
1541    errors = [o.dvalue for o in obs]
1542    cov = np.diag(errors) @ corr @ np.diag(errors)
1543
1544    eigenvalues = np.linalg.eigh(cov)[0]
1545    if not np.all(eigenvalues >= 0):
1546        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1547
1548    return cov
1549
1550
1551def invert_corr_cov_cholesky(corr, inverrdiag):
1552    """Constructs a lower triangular matrix `chol` via the Cholesky decomposition of the correlation matrix `corr`
1553       and then returns the inverse covariance matrix `chol_inv` as a lower triangular matrix by solving `chol * x = inverrdiag`.
1554
1555    Parameters
1556    ----------
1557    corr : np.ndarray
1558           correlation matrix
1559    inverrdiag : np.ndarray
1560              diagonal matrix, the entries are the inverse errors of the data points considered
1561    """
1562
1563    condn = np.linalg.cond(corr)
1564    if condn > 0.1 / np.finfo(float).eps:
1565        raise ValueError(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
1566    if condn > 1e13:
1567        warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
1568    chol = np.linalg.cholesky(corr)
1569    chol_inv = scipy.linalg.solve_triangular(chol, inverrdiag, lower=True)
1570
1571    return chol_inv
1572
1573
1574def sort_corr(corr, kl, yd):
1575    """ Reorders a correlation matrix to match the alphabetical order of its underlying y data.
1576
1577    The ordering of the input correlation matrix `corr` is given by the list of keys `kl`.
1578    The input dictionary `yd` (with the same keys `kl`) must contain the corresponding y data
1579    that the correlation matrix is based on.
1580    This function sorts the list of keys `kl` alphabetically and sorts the matrix `corr`
1581    according to this alphabetical order such that the sorted matrix `corr_sorted` corresponds
1582    to the y data `yd` when arranged in an alphabetical order by its keys.
1583
1584    Parameters
1585    ----------
1586    corr : np.ndarray
1587        A square correlation matrix constructed using the order of the y data specified by `kl`.
1588        The dimensions of `corr` should match the total number of y data points in `yd` combined.
1589    kl : list of str
1590        A list of keys that denotes the order in which the y data from `yd` was used to build the
1591        input correlation matrix `corr`.
1592    yd : dict of list
1593        A dictionary where each key corresponds to a unique identifier, and its value is a list of
1594        y data points. The total number of y data points across all keys must match the dimensions
1595        of `corr`. The lists in the dictionary can be lists of Obs.
1596
1597    Returns
1598    -------
1599    np.ndarray
1600        A new, sorted correlation matrix that corresponds to the y data from `yd` when arranged alphabetically by its keys.
1601
1602    Example
1603    -------
1604    >>> import numpy as np
1605    >>> import pyerrors as pe
1606    >>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
1607    >>> kl = ['b', 'a']
1608    >>> yd = {'a': [1, 2], 'b': [3]}
1609    >>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
1610    >>> print(sorted_corr)
1611    array([[1. , 0.3, 0.4],
1612           [0.3, 1. , 0.2],
1613           [0.4, 0.2, 1. ]])
1614
1615    """
1616    kl_sorted = sorted(kl)
1617
1618    posd = {}
1619    ofs = 0
1620    for ki, k in enumerate(kl):
1621        posd[k] = [i + ofs for i in range(len(yd[k]))]
1622        ofs += len(posd[k])
1623
1624    mapping = []
1625    for k in kl_sorted:
1626        for i in range(len(yd[k])):
1627            mapping.append(posd[k][i])
1628
1629    corr_sorted = np.zeros_like(corr)
1630    for i in range(corr.shape[0]):
1631        for j in range(corr.shape[0]):
1632            corr_sorted[i][j] = corr[mapping[i]][mapping[j]]
1633
1634    return corr_sorted
1635
1636
1637def _smooth_eigenvalues(corr, E):
1638    """Eigenvalue smoothing as described in hep-lat/9412087
1639
1640    corr : np.ndarray
1641        correlation matrix
1642    E : integer
1643        Number of eigenvalues to be left substantially unchanged
1644    """
1645    if not (2 < E < corr.shape[0] - 1):
1646        raise ValueError(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1647    vals, vec = np.linalg.eigh(corr)
1648    lambda_min = np.mean(vals[:-E])
1649    vals[vals < lambda_min] = lambda_min
1650    vals /= np.mean(vals)
1651    return vec @ np.diag(vals) @ vec.T
1652
1653
1654def _covariance_element(obs1, obs2):
1655    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1656
1657    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1658        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1659        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1660        return np.sum(deltas1 * deltas2)
1661
1662    if set(obs1.names).isdisjoint(set(obs2.names)):
1663        return 0.0
1664
1665    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1666        raise Exception('The gamma method has to be applied to both Obs first.')
1667
1668    dvalue = 0.0
1669
1670    for e_name in obs1.mc_names:
1671
1672        if e_name not in obs2.mc_names:
1673            continue
1674
1675        idl_d = {}
1676        for r_name in obs1.e_content[e_name]:
1677            if r_name not in obs2.e_content[e_name]:
1678                continue
1679            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1680
1681        gamma = 0.0
1682
1683        for r_name in obs1.e_content[e_name]:
1684            if r_name not in obs2.e_content[e_name]:
1685                continue
1686            if len(idl_d[r_name]) == 0:
1687                continue
1688            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1689
1690        if gamma == 0.0:
1691            continue
1692
1693        gamma_div = 0.0
1694        for r_name in obs1.e_content[e_name]:
1695            if r_name not in obs2.e_content[e_name]:
1696                continue
1697            if len(idl_d[r_name]) == 0:
1698                continue
1699            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1700        gamma /= gamma_div
1701
1702        dvalue += gamma
1703
1704    for e_name in obs1.cov_names:
1705
1706        if e_name not in obs2.cov_names:
1707            continue
1708
1709        dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item()
1710
1711    return dvalue
1712
1713
1714def import_jackknife(jacks, name, idl=None):
1715    """Imports jackknife samples and returns an Obs
1716
1717    Parameters
1718    ----------
1719    jacks : numpy.ndarray
1720        numpy array containing the mean value as zeroth entry and
1721        the N jackknife samples as first to Nth entry.
1722    name : str
1723        name of the ensemble the samples are defined on.
1724    """
1725    length = len(jacks) - 1
1726    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1727    samples = jacks[1:] @ prj
1728    mean = np.mean(samples)
1729    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1730    new_obs._value = jacks[0]
1731    return new_obs
1732
1733
1734def import_bootstrap(boots, name, random_numbers):
1735    """Imports bootstrap samples and returns an Obs
1736
1737    Parameters
1738    ----------
1739    boots : numpy.ndarray
1740        numpy array containing the mean value as zeroth entry and
1741        the N bootstrap samples as first to Nth entry.
1742    name : str
1743        name of the ensemble the samples are defined on.
1744    random_numbers : np.ndarray
1745        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1746        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1747        chain to be reconstructed.
1748    """
1749    samples, length = random_numbers.shape
1750    if samples != len(boots) - 1:
1751        raise ValueError("Random numbers do not have the correct shape.")
1752
1753    if samples < length:
1754        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1755
1756    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1757
1758    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1759    ret = Obs([samples], [name])
1760    ret._value = boots[0]
1761    return ret
1762
1763
1764def merge_obs(list_of_obs):
1765    """Combine all observables in list_of_obs into one new observable.
1766    This allows to merge Obs that have been computed on multiple replica
1767    of the same ensemble.
1768    If you like to merge Obs that are based on several ensembles, please
1769    average them yourself.
1770
1771    Parameters
1772    ----------
1773    list_of_obs : list
1774        list of the Obs object to be combined
1775
1776    Notes
1777    -----
1778    It is not possible to combine obs which are based on the same replicum
1779    """
1780    replist = [item for obs in list_of_obs for item in obs.names]
1781    if (len(replist) == len(set(replist))) is False:
1782        raise ValueError('list_of_obs contains duplicate replica: %s' % (str(replist)))
1783    if any([len(o.cov_names) for o in list_of_obs]):
1784        raise ValueError('Not possible to merge data that contains covobs!')
1785    new_dict = {}
1786    idl_dict = {}
1787    for o in list_of_obs:
1788        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1789                        for key in set(o.deltas) | set(o.r_values)})
1790        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1791
1792    names = sorted(new_dict.keys())
1793    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1794    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1795    return o
1796
1797
1798def cov_Obs(means, cov, name, grad=None):
1799    """Create an Obs based on mean(s) and a covariance matrix
1800
1801    Parameters
1802    ----------
1803    mean : list of floats or float
1804        N mean value(s) of the new Obs
1805    cov : list or array
1806        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1807    name : str
1808        identifier for the covariance matrix
1809    grad : list or array
1810        Gradient of the Covobs wrt. the means belonging to cov.
1811    """
1812
1813    def covobs_to_obs(co):
1814        """Make an Obs out of a Covobs
1815
1816        Parameters
1817        ----------
1818        co : Covobs
1819            Covobs to be embedded into the Obs
1820        """
1821        o = Obs([], [], means=[])
1822        o._value = co.value
1823        o.names.append(co.name)
1824        o._covobs[co.name] = co
1825        o._dvalue = np.sqrt(co.errsq())
1826        return o
1827
1828    ol = []
1829    if isinstance(means, (float, int)):
1830        means = [means]
1831
1832    for i in range(len(means)):
1833        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1834    if ol[0].covobs[name].N != len(means):
1835        raise ValueError('You have to provide %d mean values!' % (ol[0].N))
1836    if len(ol) == 1:
1837        return ol[0]
1838    return ol
1839
1840
1841def _determine_gap(o, e_content, e_name):
1842    gaps = []
1843    for r_name in e_content[e_name]:
1844        if isinstance(o.idl[r_name], range):
1845            gaps.append(o.idl[r_name].step)
1846        else:
1847            gaps.append(np.min(np.diff(o.idl[r_name])))
1848
1849    gap = min(gaps)
1850    if not np.all([gi % gap == 0 for gi in gaps]):
1851        raise ValueError(f"Replica for ensemble {e_name} do not have a common spacing.", gaps)
1852
1853    return gap
1854
1855
1856def _check_lists_equal(idl):
1857    '''
1858    Use groupby to efficiently check whether all elements of idl are identical.
1859    Returns True if all elements are equal, otherwise False.
1860
1861    Parameters
1862    ----------
1863    idl : list of lists, ranges or np.ndarrays
1864    '''
1865    g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl])
1866    if next(g, True) and not next(g, False):
1867        return True
1868    return False

class CObs: View Source

 919class CObs:
 920    """Class for a complex valued observable."""
 921    __slots__ = ['_real', '_imag', 'tag']
 922
 923    def __init__(self, real, imag=0.0):
 924        self._real = real
 925        self._imag = imag
 926        self.tag = None
 927
 928    @property
 929    def real(self):
 930        return self._real
 931
 932    @property
 933    def imag(self):
 934        return self._imag
 935
 936    def gamma_method(self, **kwargs):
 937        """Executes the gamma_method for the real and the imaginary part."""
 938        if isinstance(self.real, Obs):
 939            self.real.gamma_method(**kwargs)
 940        if isinstance(self.imag, Obs):
 941            self.imag.gamma_method(**kwargs)
 942
 943    def is_zero(self):
 944        """Checks whether both real and imaginary part are zero within machine precision."""
 945        return self.real == 0.0 and self.imag == 0.0
 946
 947    def conjugate(self):
 948        return CObs(self.real, -self.imag)
 949
 950    def __add__(self, other):
 951        if isinstance(other, np.ndarray):
 952            return other + self
 953        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 954            return CObs(self.real + other.real,
 955                        self.imag + other.imag)
 956        else:
 957            return CObs(self.real + other, self.imag)
 958
 959    def __radd__(self, y):
 960        return self + y
 961
 962    def __sub__(self, other):
 963        if isinstance(other, np.ndarray):
 964            return -1 * (other - self)
 965        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 966            return CObs(self.real - other.real, self.imag - other.imag)
 967        else:
 968            return CObs(self.real - other, self.imag)
 969
 970    def __rsub__(self, other):
 971        return -1 * (self - other)
 972
 973    def __mul__(self, other):
 974        if isinstance(other, np.ndarray):
 975            return other * self
 976        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 977            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 978                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 979                                               [self.real, other.real, self.imag, other.imag],
 980                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 981                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 982                                               [self.real, other.real, self.imag, other.imag],
 983                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 984            elif getattr(other, 'imag', 0) != 0:
 985                return CObs(self.real * other.real - self.imag * other.imag,
 986                            self.imag * other.real + self.real * other.imag)
 987            else:
 988                return CObs(self.real * other.real, self.imag * other.real)
 989        else:
 990            return CObs(self.real * other, self.imag * other)
 991
 992    def __rmul__(self, other):
 993        return self * other
 994
 995    def __truediv__(self, other):
 996        if isinstance(other, np.ndarray):
 997            return 1 / (other / self)
 998        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 999            r = other.real ** 2 + other.imag ** 2
1000            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
1001        else:
1002            return CObs(self.real / other, self.imag / other)
1003
1004    def __rtruediv__(self, other):
1005        r = self.real ** 2 + self.imag ** 2
1006        if hasattr(other, 'real') and hasattr(other, 'imag'):
1007            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1008        else:
1009            return CObs(self.real * other / r, -self.imag * other / r)
1010
1011    def __abs__(self):
1012        return np.sqrt(self.real**2 + self.imag**2)
1013
1014    def __pos__(self):
1015        return self
1016
1017    def __neg__(self):
1018        return -1 * self
1019
1020    def __eq__(self, other):
1021        return self.real == other.real and self.imag == other.imag
1022
1023    def __str__(self):
1024        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1025
1026    def __repr__(self):
1027        return 'CObs[' + str(self) + ']'
1028
1029    def __format__(self, format_type):
1030        if format_type == "":
1031            significance = 2
1032            format_type = "2"
1033        else:
1034            significance = int(float(format_type.replace("+", "").replace("-", "")))
1035        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"

Class for a complex valued observable.

CObs(real, imag=0.0) View Source

923    def __init__(self, real, imag=0.0):
924        self._real = real
925        self._imag = imag
926        self.tag = None

tag

real View Source

928    @property
929    def real(self):
930        return self._real

imag View Source

932    @property
933    def imag(self):
934        return self._imag

def gamma_method(self, **kwargs): View Source

936    def gamma_method(self, **kwargs):
937        """Executes the gamma_method for the real and the imaginary part."""
938        if isinstance(self.real, Obs):
939            self.real.gamma_method(**kwargs)
940        if isinstance(self.imag, Obs):
941            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self): View Source

943    def is_zero(self):
944        """Checks whether both real and imaginary part are zero within machine precision."""
945        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self): View Source

947    def conjugate(self):
948        return CObs(self.real, -self.imag)

def gamma_method(x, **kwargs): View Source

1038def gamma_method(x, **kwargs):
1039    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1040
1041    See docstring of pe.Obs.gamma_method for details.
1042    """
1043    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def gm(x, **kwargs): View Source

1038def gamma_method(x, **kwargs):
1039    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1040
1041    See docstring of pe.Obs.gamma_method for details.
1042    """
1043    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def derived_observable(func, data, array_mode=False, **kwargs): View Source

1173def derived_observable(func, data, array_mode=False, **kwargs):
1174    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1175
1176    Parameters
1177    ----------
1178    func : object
1179        arbitrary function of the form func(data, **kwargs). For the
1180        automatic differentiation to work, all numpy functions have to have
1181        the autograd wrapper (use 'import autograd.numpy as anp').
1182    data : list
1183        list of Obs, e.g. [obs1, obs2, obs3].
1184    num_grad : bool
1185        if True, numerical derivatives are used instead of autograd
1186        (default False). To control the numerical differentiation the
1187        kwargs of numdifftools.step_generators.MaxStepGenerator
1188        can be used.
1189    man_grad : list
1190        manually supply a list or an array which contains the jacobian
1191        of func. Use cautiously, supplying the wrong derivative will
1192        not be intercepted.
1193
1194    Notes
1195    -----
1196    For simple mathematical operations it can be practical to use anonymous
1197    functions. For the ratio of two observables one can e.g. use
1198
1199    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1200    """
1201
1202    data = np.asarray(data)
1203    raveled_data = data.ravel()
1204
1205    # Workaround for matrix operations containing non Obs data
1206    if not all(isinstance(x, Obs) for x in raveled_data):
1207        for i in range(len(raveled_data)):
1208            if isinstance(raveled_data[i], (int, float)):
1209                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1210
1211    allcov = {}
1212    for o in raveled_data:
1213        for name in o.cov_names:
1214            if name in allcov:
1215                if not np.allclose(allcov[name], o.covobs[name].cov):
1216                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1217            else:
1218                allcov[name] = o.covobs[name].cov
1219
1220    n_obs = len(raveled_data)
1221    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1222    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1223    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1224
1225    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1226
1227    if data.ndim == 1:
1228        values = np.array([o.value for o in data])
1229    else:
1230        values = np.vectorize(lambda x: x.value)(data)
1231
1232    new_values = func(values, **kwargs)
1233
1234    multi = int(isinstance(new_values, np.ndarray))
1235
1236    new_r_values = {}
1237    new_idl_d = {}
1238    for name in new_sample_names:
1239        idl = []
1240        tmp_values = np.zeros(n_obs)
1241        for i, item in enumerate(raveled_data):
1242            tmp_values[i] = item.r_values.get(name, item.value)
1243            tmp_idl = item.idl.get(name)
1244            if tmp_idl is not None:
1245                idl.append(tmp_idl)
1246        if multi > 0:
1247            tmp_values = np.array(tmp_values).reshape(data.shape)
1248        new_r_values[name] = func(tmp_values, **kwargs)
1249        new_idl_d[name] = _merge_idx(idl)
1250
1251    def _compute_scalefactor_missing_rep(obs):
1252        """
1253        Computes the scale factor that is to be multiplied with the deltas
1254        in the case where Obs with different subsets of replica are merged.
1255        Returns a dictionary with the scale factor for each Monte Carlo name.
1256
1257        Parameters
1258        ----------
1259        obs : Obs
1260            The observable corresponding to the deltas that are to be scaled
1261        """
1262        scalef_d = {}
1263        for mc_name in obs.mc_names:
1264            mc_idl_d = [name for name in obs.idl if name.startswith(mc_name + '|')]
1265            new_mc_idl_d = [name for name in new_idl_d if name.startswith(mc_name + '|')]
1266            if len(mc_idl_d) > 0 and len(mc_idl_d) < len(new_mc_idl_d):
1267                scalef_d[mc_name] = sum([len(new_idl_d[name]) for name in new_mc_idl_d]) / sum([len(new_idl_d[name]) for name in mc_idl_d])
1268        return scalef_d
1269
1270    if 'man_grad' in kwargs:
1271        deriv = np.asarray(kwargs.get('man_grad'))
1272        if new_values.shape + data.shape != deriv.shape:
1273            raise ValueError('Manual derivative does not have correct shape.')
1274    elif kwargs.get('num_grad') is True:
1275        if multi > 0:
1276            raise Exception('Multi mode currently not supported for numerical derivative')
1277        options = {
1278            'base_step': 0.1,
1279            'step_ratio': 2.5}
1280        for key in options.keys():
1281            kwarg = kwargs.get(key)
1282            if kwarg is not None:
1283                options[key] = kwarg
1284        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1285        if tmp_df.size == 1:
1286            deriv = np.array([tmp_df.real])
1287        else:
1288            deriv = tmp_df.real
1289    else:
1290        deriv = jacobian(func)(values, **kwargs)
1291
1292    final_result = np.zeros(new_values.shape, dtype=object)
1293
1294    if array_mode is True:
1295
1296        class _Zero_grad():
1297            def __init__(self, N):
1298                self.grad = np.zeros((N, 1))
1299
1300        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1301        d_extracted = {}
1302        g_extracted = {}
1303        for name in new_sample_names:
1304            d_extracted[name] = []
1305            ens_length = len(new_idl_d[name])
1306            for i_dat, dat in enumerate(data):
1307                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name], _compute_scalefactor_missing_rep(o).get(name.split('|')[0], 1)) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1308        for name in new_cov_names:
1309            g_extracted[name] = []
1310            zero_grad = _Zero_grad(new_covobs_lengths[name])
1311            for i_dat, dat in enumerate(data):
1312                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1313
1314    for i_val, new_val in np.ndenumerate(new_values):
1315        new_deltas = {}
1316        new_grad = {}
1317        if array_mode is True:
1318            for name in new_sample_names:
1319                ens_length = d_extracted[name][0].shape[-1]
1320                new_deltas[name] = np.zeros(ens_length)
1321                for i_dat, dat in enumerate(d_extracted[name]):
1322                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1323            for name in new_cov_names:
1324                new_grad[name] = 0
1325                for i_dat, dat in enumerate(g_extracted[name]):
1326                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1327        else:
1328            for j_obs, obs in np.ndenumerate(data):
1329                scalef_d = _compute_scalefactor_missing_rep(obs)
1330                for name in obs.names:
1331                    if name in obs.cov_names:
1332                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1333                    else:
1334                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name], scalef_d.get(name.split('|')[0], 1))
1335
1336        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1337
1338        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1339            raise ValueError('The same name has been used for deltas and covobs!')
1340        new_samples = []
1341        new_means = []
1342        new_idl = []
1343        new_names_obs = []
1344        for name in new_names:
1345            if name not in new_covobs:
1346                new_samples.append(new_deltas[name])
1347                new_idl.append(new_idl_d[name])
1348                new_means.append(new_r_values[name][i_val])
1349                new_names_obs.append(name)
1350        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1351        for name in new_covobs:
1352            final_result[i_val].names.append(name)
1353        final_result[i_val]._covobs = new_covobs
1354        final_result[i_val]._value = new_val
1355        final_result[i_val].reweighted = reweighted
1356
1357    if multi == 0:
1358        final_result = final_result.item()
1359
1360    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters

func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
data (list): list of Obs, e.g. [obs1, obs2, obs3].
num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.

Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs): View Source

1392def reweight(weight, obs, **kwargs):
1393    """Reweight a list of observables.
1394
1395    Parameters
1396    ----------
1397    weight : Obs
1398        Reweighting factor. An Observable that has to be defined on a superset of the
1399        configurations in obs[i].idl for all i.
1400    obs : list
1401        list of Obs, e.g. [obs1, obs2, obs3].
1402    all_configs : bool
1403        if True, the reweighted observables are normalized by the average of
1404        the reweighting factor on all configurations in weight.idl and not
1405        on the configurations in obs[i].idl. Default False.
1406    """
1407    result = []
1408    for i in range(len(obs)):
1409        if len(obs[i].cov_names):
1410            raise ValueError('Error: Not possible to reweight an Obs that contains covobs!')
1411        if not set(obs[i].names).issubset(weight.names):
1412            raise ValueError('Error: Ensembles do not fit')
1413        if len(obs[i].mc_names) > 1 or len(weight.mc_names) > 1:
1414            raise ValueError('Error: Cannot reweight an Obs that contains multiple ensembles.')
1415        for name in obs[i].names:
1416            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1417                raise ValueError('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1418        new_samples = []
1419        w_deltas = {}
1420        for name in sorted(obs[i].names):
1421            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1422            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1423        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1424
1425        if kwargs.get('all_configs'):
1426            new_weight = weight
1427        else:
1428            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1429
1430        result.append(tmp_obs / new_weight)
1431        result[-1].reweighted = True
1432
1433    return result

Reweight a list of observables.

Parameters

weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
obs (list): list of Obs, e.g. [obs1, obs2, obs3].
all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.

def correlate(obs_a, obs_b): View Source

1436def correlate(obs_a, obs_b):
1437    """Correlate two observables.
1438
1439    Parameters
1440    ----------
1441    obs_a : Obs
1442        First observable
1443    obs_b : Obs
1444        Second observable
1445
1446    Notes
1447    -----
1448    Keep in mind to only correlate primary observables which have not been reweighted
1449    yet. The reweighting has to be applied after correlating the observables.
1450    Only works if a single ensemble is present in the Obs.
1451    Currently only works if ensemble content is identical (this is not strictly necessary).
1452    """
1453
1454    if len(obs_a.mc_names) > 1 or len(obs_b.mc_names) > 1:
1455        raise ValueError('Error: Cannot correlate Obs that contain multiple ensembles.')
1456    if sorted(obs_a.names) != sorted(obs_b.names):
1457        raise ValueError(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1458    if len(obs_a.cov_names) or len(obs_b.cov_names):
1459        raise ValueError('Error: Not possible to correlate Obs that contain covobs!')
1460    for name in obs_a.names:
1461        if obs_a.shape[name] != obs_b.shape[name]:
1462            raise ValueError('Shapes of ensemble', name, 'do not fit')
1463        if obs_a.idl[name] != obs_b.idl[name]:
1464            raise ValueError('idl of ensemble', name, 'do not fit')
1465
1466    if obs_a.reweighted is True:
1467        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1468    if obs_b.reweighted is True:
1469        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1470
1471    new_samples = []
1472    new_idl = []
1473    for name in sorted(obs_a.names):
1474        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1475        new_idl.append(obs_a.idl[name])
1476
1477    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1478    o.reweighted = obs_a.reweighted or obs_b.reweighted
1479    return o

Correlate two observables.

Parameters

obs_a (Obs): First observable
obs_b (Obs): Second observable

Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Only works if a single ensemble is present in the Obs. Currently only works if ensemble content is identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): View Source

1482def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1483    r'''Calculates the error covariance matrix of a set of observables.
1484
1485    WARNING: This function should be used with care, especially for observables with support on multiple
1486             ensembles with differing autocorrelations. See the notes below for details.
1487
1488    The gamma method has to be applied first to all observables.
1489
1490    Parameters
1491    ----------
1492    obs : list or numpy.ndarray
1493        List or one dimensional array of Obs
1494    visualize : bool
1495        If True plots the corresponding normalized correlation matrix (default False).
1496    correlation : bool
1497        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1498    smooth : None or int
1499        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1500        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1501        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1502        small ones.
1503
1504    Notes
1505    -----
1506    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1507    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1508    in the absence of autocorrelation.
1509    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1510    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1511    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1512    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1513    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1514    '''
1515
1516    length = len(obs)
1517
1518    max_samples = np.max([o.N for o in obs])
1519    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1520        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1521
1522    cov = np.zeros((length, length))
1523    for i in range(length):
1524        for j in range(i, length):
1525            cov[i, j] = _covariance_element(obs[i], obs[j])
1526    cov = cov + cov.T - np.diag(np.diag(cov))
1527
1528    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1529
1530    if isinstance(smooth, int):
1531        corr = _smooth_eigenvalues(corr, smooth)
1532
1533    if visualize:
1534        plt.matshow(corr, vmin=-1, vmax=1)
1535        plt.set_cmap('RdBu')
1536        plt.colorbar()
1537        plt.draw()
1538
1539    if correlation is True:
1540        return corr
1541
1542    errors = [o.dvalue for o in obs]
1543    cov = np.diag(errors) @ corr @ np.diag(errors)
1544
1545    eigenvalues = np.linalg.eigh(cov)[0]
1546    if not np.all(eigenvalues >= 0):
1547        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1548
1549    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters

obs (list or numpy.ndarray): List or one dimensional array of Obs
visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.

Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def invert_corr_cov_cholesky(corr, inverrdiag): View Source

1552def invert_corr_cov_cholesky(corr, inverrdiag):
1553    """Constructs a lower triangular matrix `chol` via the Cholesky decomposition of the correlation matrix `corr`
1554       and then returns the inverse covariance matrix `chol_inv` as a lower triangular matrix by solving `chol * x = inverrdiag`.
1555
1556    Parameters
1557    ----------
1558    corr : np.ndarray
1559           correlation matrix
1560    inverrdiag : np.ndarray
1561              diagonal matrix, the entries are the inverse errors of the data points considered
1562    """
1563
1564    condn = np.linalg.cond(corr)
1565    if condn > 0.1 / np.finfo(float).eps:
1566        raise ValueError(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
1567    if condn > 1e13:
1568        warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
1569    chol = np.linalg.cholesky(corr)
1570    chol_inv = scipy.linalg.solve_triangular(chol, inverrdiag, lower=True)
1571
1572    return chol_inv

Constructs a lower triangular matrix chol via the Cholesky decomposition of the correlation matrix corr and then returns the inverse covariance matrix chol_inv as a lower triangular matrix by solving chol * x = inverrdiag.

Parameters

corr (np.ndarray): correlation matrix
inverrdiag (np.ndarray): diagonal matrix, the entries are the inverse errors of the data points considered

def sort_corr(corr, kl, yd): View Source

1575def sort_corr(corr, kl, yd):
1576    """ Reorders a correlation matrix to match the alphabetical order of its underlying y data.
1577
1578    The ordering of the input correlation matrix `corr` is given by the list of keys `kl`.
1579    The input dictionary `yd` (with the same keys `kl`) must contain the corresponding y data
1580    that the correlation matrix is based on.
1581    This function sorts the list of keys `kl` alphabetically and sorts the matrix `corr`
1582    according to this alphabetical order such that the sorted matrix `corr_sorted` corresponds
1583    to the y data `yd` when arranged in an alphabetical order by its keys.
1584
1585    Parameters
1586    ----------
1587    corr : np.ndarray
1588        A square correlation matrix constructed using the order of the y data specified by `kl`.
1589        The dimensions of `corr` should match the total number of y data points in `yd` combined.
1590    kl : list of str
1591        A list of keys that denotes the order in which the y data from `yd` was used to build the
1592        input correlation matrix `corr`.
1593    yd : dict of list
1594        A dictionary where each key corresponds to a unique identifier, and its value is a list of
1595        y data points. The total number of y data points across all keys must match the dimensions
1596        of `corr`. The lists in the dictionary can be lists of Obs.
1597
1598    Returns
1599    -------
1600    np.ndarray
1601        A new, sorted correlation matrix that corresponds to the y data from `yd` when arranged alphabetically by its keys.
1602
1603    Example
1604    -------
1605    >>> import numpy as np
1606    >>> import pyerrors as pe
1607    >>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
1608    >>> kl = ['b', 'a']
1609    >>> yd = {'a': [1, 2], 'b': [3]}
1610    >>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
1611    >>> print(sorted_corr)
1612    array([[1. , 0.3, 0.4],
1613           [0.3, 1. , 0.2],
1614           [0.4, 0.2, 1. ]])
1615
1616    """
1617    kl_sorted = sorted(kl)
1618
1619    posd = {}
1620    ofs = 0
1621    for ki, k in enumerate(kl):
1622        posd[k] = [i + ofs for i in range(len(yd[k]))]
1623        ofs += len(posd[k])
1624
1625    mapping = []
1626    for k in kl_sorted:
1627        for i in range(len(yd[k])):
1628            mapping.append(posd[k][i])
1629
1630    corr_sorted = np.zeros_like(corr)
1631    for i in range(corr.shape[0]):
1632        for j in range(corr.shape[0]):
1633            corr_sorted[i][j] = corr[mapping[i]][mapping[j]]
1634
1635    return corr_sorted

Reorders a correlation matrix to match the alphabetical order of its underlying y data.

The ordering of the input correlation matrix corr is given by the list of keys kl. The input dictionary yd (with the same keys kl) must contain the corresponding y data that the correlation matrix is based on. This function sorts the list of keys kl alphabetically and sorts the matrix corr according to this alphabetical order such that the sorted matrix corr_sorted corresponds to the y data yd when arranged in an alphabetical order by its keys.

Parameters

corr (np.ndarray): A square correlation matrix constructed using the order of the y data specified by kl. The dimensions of corr should match the total number of y data points in yd combined.
kl (list of str): A list of keys that denotes the order in which the y data from yd was used to build the input correlation matrix corr.
yd (dict of list): A dictionary where each key corresponds to a unique identifier, and its value is a list of y data points. The total number of y data points across all keys must match the dimensions of corr. The lists in the dictionary can be lists of Obs.

Returns

np.ndarray: A new, sorted correlation matrix that corresponds to the y data from yd when arranged alphabetically by its keys.

Example

>>> import numpy as np
>>> import pyerrors as pe
>>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
>>> kl = ['b', 'a']
>>> yd = {'a': [1, 2], 'b': [3]}
>>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
>>> print(sorted_corr)
array([[1. , 0.3, 0.4],
       [0.3, 1. , 0.2],
       [0.4, 0.2, 1. ]])

def import_jackknife(jacks, name, idl=None): View Source

1715def import_jackknife(jacks, name, idl=None):
1716    """Imports jackknife samples and returns an Obs
1717
1718    Parameters
1719    ----------
1720    jacks : numpy.ndarray
1721        numpy array containing the mean value as zeroth entry and
1722        the N jackknife samples as first to Nth entry.
1723    name : str
1724        name of the ensemble the samples are defined on.
1725    """
1726    length = len(jacks) - 1
1727    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1728    samples = jacks[1:] @ prj
1729    mean = np.mean(samples)
1730    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1731    new_obs._value = jacks[0]
1732    return new_obs

Imports jackknife samples and returns an Obs

Parameters

jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
name (str): name of the ensemble the samples are defined on.

def import_bootstrap(boots, name, random_numbers): View Source

1735def import_bootstrap(boots, name, random_numbers):
1736    """Imports bootstrap samples and returns an Obs
1737
1738    Parameters
1739    ----------
1740    boots : numpy.ndarray
1741        numpy array containing the mean value as zeroth entry and
1742        the N bootstrap samples as first to Nth entry.
1743    name : str
1744        name of the ensemble the samples are defined on.
1745    random_numbers : np.ndarray
1746        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1747        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1748        chain to be reconstructed.
1749    """
1750    samples, length = random_numbers.shape
1751    if samples != len(boots) - 1:
1752        raise ValueError("Random numbers do not have the correct shape.")
1753
1754    if samples < length:
1755        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1756
1757    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1758
1759    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1760    ret = Obs([samples], [name])
1761    ret._value = boots[0]
1762    return ret

Imports bootstrap samples and returns an Obs

Parameters

boots (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N bootstrap samples as first to Nth entry.
name (str): name of the ensemble the samples are defined on.
random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, where samples is the number of bootstrap samples and length is the length of the original Monte Carlo chain to be reconstructed.

def merge_obs(list_of_obs): View Source

1765def merge_obs(list_of_obs):
1766    """Combine all observables in list_of_obs into one new observable.
1767    This allows to merge Obs that have been computed on multiple replica
1768    of the same ensemble.
1769    If you like to merge Obs that are based on several ensembles, please
1770    average them yourself.
1771
1772    Parameters
1773    ----------
1774    list_of_obs : list
1775        list of the Obs object to be combined
1776
1777    Notes
1778    -----
1779    It is not possible to combine obs which are based on the same replicum
1780    """
1781    replist = [item for obs in list_of_obs for item in obs.names]
1782    if (len(replist) == len(set(replist))) is False:
1783        raise ValueError('list_of_obs contains duplicate replica: %s' % (str(replist)))
1784    if any([len(o.cov_names) for o in list_of_obs]):
1785        raise ValueError('Not possible to merge data that contains covobs!')
1786    new_dict = {}
1787    idl_dict = {}
1788    for o in list_of_obs:
1789        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1790                        for key in set(o.deltas) | set(o.r_values)})
1791        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1792
1793    names = sorted(new_dict.keys())
1794    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1795    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1796    return o

Combine all observables in list_of_obs into one new observable. This allows to merge Obs that have been computed on multiple replica of the same ensemble. If you like to merge Obs that are based on several ensembles, please average them yourself.

Parameters

list_of_obs (list): list of the Obs object to be combined

Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None): View Source

1799def cov_Obs(means, cov, name, grad=None):
1800    """Create an Obs based on mean(s) and a covariance matrix
1801
1802    Parameters
1803    ----------
1804    mean : list of floats or float
1805        N mean value(s) of the new Obs
1806    cov : list or array
1807        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1808    name : str
1809        identifier for the covariance matrix
1810    grad : list or array
1811        Gradient of the Covobs wrt. the means belonging to cov.
1812    """
1813
1814    def covobs_to_obs(co):
1815        """Make an Obs out of a Covobs
1816
1817        Parameters
1818        ----------
1819        co : Covobs
1820            Covobs to be embedded into the Obs
1821        """
1822        o = Obs([], [], means=[])
1823        o._value = co.value
1824        o.names.append(co.name)
1825        o._covobs[co.name] = co
1826        o._dvalue = np.sqrt(co.errsq())
1827        return o
1828
1829    ol = []
1830    if isinstance(means, (float, int)):
1831        means = [means]
1832
1833    for i in range(len(means)):
1834        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1835    if ol[0].covobs[name].N != len(means):
1836        raise ValueError('You have to provide %d mean values!' % (ol[0].N))
1837    if len(ol) == 1:
1838        return ol[0]
1839    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters

mean (list of floats or float): N mean value(s) of the new Obs
cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
name (str): identifier for the covariance matrix
grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.