pyerrors.obs

View Source

   1import warnings
   2import hashlib
   3import pickle
   4import numpy as np
   5import autograd.numpy as anp  # Thinly-wrapped numpy
   6import scipy
   7from autograd import jacobian
   8import matplotlib.pyplot as plt
   9from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  10import numdifftools as nd
  11from itertools import groupby
  12from .covobs import Covobs
  13
  14# Improve print output of numpy.ndarrays containing Obs objects.
  15np.set_printoptions(formatter={'object': lambda x: str(x)})
  16
  17
  18class Obs:
  19    """Class for a general observable.
  20
  21    Instances of Obs are the basic objects of a pyerrors error analysis.
  22    They are initialized with a list which contains arrays of samples for
  23    different ensembles/replica and another list of same length which contains
  24    the names of the ensembles/replica. Mathematical operations can be
  25    performed on instances. The result is another instance of Obs. The error of
  26    an instance can be computed with the gamma_method. Also contains additional
  27    methods for output and visualization of the error calculation.
  28
  29    Attributes
  30    ----------
  31    S_global : float
  32        Standard value for S (default 2.0)
  33    S_dict : dict
  34        Dictionary for S values. If an entry for a given ensemble
  35        exists this overwrites the standard value for that ensemble.
  36    tau_exp_global : float
  37        Standard value for tau_exp (default 0.0)
  38    tau_exp_dict : dict
  39        Dictionary for tau_exp values. If an entry for a given ensemble exists
  40        this overwrites the standard value for that ensemble.
  41    N_sigma_global : float
  42        Standard value for N_sigma (default 1.0)
  43    N_sigma_dict : dict
  44        Dictionary for N_sigma values. If an entry for a given ensemble exists
  45        this overwrites the standard value for that ensemble.
  46    """
  47    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  48                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  49                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  50                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  51                 'idl', 'tag', '_covobs', '__dict__']
  52
  53    S_global = 2.0
  54    S_dict = {}
  55    tau_exp_global = 0.0
  56    tau_exp_dict = {}
  57    N_sigma_global = 1.0
  58    N_sigma_dict = {}
  59
  60    def __init__(self, samples, names, idl=None, **kwargs):
  61        """ Initialize Obs object.
  62
  63        Parameters
  64        ----------
  65        samples : list
  66            list of numpy arrays containing the Monte Carlo samples
  67        names : list
  68            list of strings labeling the individual samples
  69        idl : list, optional
  70            list of ranges or lists on which the samples are defined
  71        """
  72
  73        if kwargs.get("means") is None and len(samples):
  74            if len(samples) != len(names):
  75                raise ValueError('Length of samples and names incompatible.')
  76            if idl is not None:
  77                if len(idl) != len(names):
  78                    raise ValueError('Length of idl incompatible with samples and names.')
  79            name_length = len(names)
  80            if name_length > 1:
  81                if name_length != len(set(names)):
  82                    raise ValueError('Names are not unique.')
  83                if not all(isinstance(x, str) for x in names):
  84                    raise TypeError('All names have to be strings.')
  85            else:
  86                if not isinstance(names[0], str):
  87                    raise TypeError('All names have to be strings.')
  88            if min(len(x) for x in samples) <= 4:
  89                raise ValueError('Samples have to have at least 5 entries.')
  90
  91        self.names = sorted(names)
  92        self.shape = {}
  93        self.r_values = {}
  94        self.deltas = {}
  95        self._covobs = {}
  96
  97        self._value = 0
  98        self.N = 0
  99        self.idl = {}
 100        if idl is not None:
 101            for name, idx in sorted(zip(names, idl)):
 102                if isinstance(idx, range):
 103                    self.idl[name] = idx
 104                elif isinstance(idx, (list, np.ndarray)):
 105                    dc = np.unique(np.diff(idx))
 106                    if np.any(dc < 0):
 107                        raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]])))
 108                    elif np.any(dc == 0):
 109                        raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]])))
 110                    if len(dc) == 1:
 111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 112                    else:
 113                        self.idl[name] = list(idx)
 114                else:
 115                    raise TypeError('incompatible type for idl[%s].' % (name))
 116        else:
 117            for name, sample in sorted(zip(names, samples)):
 118                self.idl[name] = range(1, len(sample) + 1)
 119
 120        if kwargs.get("means") is not None:
 121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 122                self.shape[name] = len(self.idl[name])
 123                self.N += self.shape[name]
 124                self.r_values[name] = mean
 125                self.deltas[name] = sample
 126        else:
 127            for name, sample in sorted(zip(names, samples)):
 128                self.shape[name] = len(self.idl[name])
 129                self.N += self.shape[name]
 130                if len(sample) != self.shape[name]:
 131                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 132                self.r_values[name] = np.mean(sample)
 133                self.deltas[name] = sample - self.r_values[name]
 134                self._value += self.shape[name] * self.r_values[name]
 135            self._value /= self.N
 136
 137        self._dvalue = 0.0
 138        self.ddvalue = 0.0
 139        self.reweighted = False
 140
 141        self.tag = None
 142
 143    @property
 144    def value(self):
 145        return self._value
 146
 147    @property
 148    def dvalue(self):
 149        return self._dvalue
 150
 151    @property
 152    def e_names(self):
 153        return sorted(set([o.split('|')[0] for o in self.names]))
 154
 155    @property
 156    def cov_names(self):
 157        return sorted(set([o for o in self.covobs.keys()]))
 158
 159    @property
 160    def mc_names(self):
 161        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 162
 163    @property
 164    def e_content(self):
 165        res = {}
 166        for e, e_name in enumerate(self.e_names):
 167            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 168            if e_name in self.names:
 169                res[e_name].append(e_name)
 170        return res
 171
 172    @property
 173    def covobs(self):
 174        return self._covobs
 175
 176    def gamma_method(self, **kwargs):
 177        """Estimate the error and related properties of the Obs.
 178
 179        Parameters
 180        ----------
 181        S : float
 182            specifies a custom value for the parameter S (default 2.0).
 183            If set to 0 it is assumed that the data exhibits no
 184            autocorrelation. In this case the error estimates coincides
 185            with the sample standard error.
 186        tau_exp : float
 187            positive value triggers the critical slowing down analysis
 188            (default 0.0).
 189        N_sigma : float
 190            number of standard deviations from zero until the tail is
 191            attached to the autocorrelation function (default 1).
 192        fft : bool
 193            determines whether the fft algorithm is used for the computation
 194            of the autocorrelation function (default True)
 195        """
 196
 197        e_content = self.e_content
 198        self.e_dvalue = {}
 199        self.e_ddvalue = {}
 200        self.e_tauint = {}
 201        self.e_dtauint = {}
 202        self.e_windowsize = {}
 203        self.e_n_tauint = {}
 204        self.e_n_dtauint = {}
 205        e_gamma = {}
 206        self.e_rho = {}
 207        self.e_drho = {}
 208        self._dvalue = 0
 209        self.ddvalue = 0
 210
 211        self.S = {}
 212        self.tau_exp = {}
 213        self.N_sigma = {}
 214
 215        if kwargs.get('fft') is False:
 216            fft = False
 217        else:
 218            fft = True
 219
 220        def _parse_kwarg(kwarg_name):
 221            if kwarg_name in kwargs:
 222                tmp = kwargs.get(kwarg_name)
 223                if isinstance(tmp, (int, float)):
 224                    if tmp < 0:
 225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 226                    for e, e_name in enumerate(self.e_names):
 227                        getattr(self, kwarg_name)[e_name] = tmp
 228                else:
 229                    raise TypeError(kwarg_name + ' is not in proper format.')
 230            else:
 231                for e, e_name in enumerate(self.e_names):
 232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 234                    else:
 235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 236
 237        _parse_kwarg('S')
 238        _parse_kwarg('tau_exp')
 239        _parse_kwarg('N_sigma')
 240
 241        for e, e_name in enumerate(self.mc_names):
 242            gapsize = _determine_gap(self, e_content, e_name)
 243
 244            r_length = []
 245            for r_name in e_content[e_name]:
 246                if isinstance(self.idl[r_name], range):
 247                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
 248                else:
 249                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
 250
 251            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 252            w_max = max(r_length) // 2
 253            e_gamma[e_name] = np.zeros(w_max)
 254            self.e_rho[e_name] = np.zeros(w_max)
 255            self.e_drho[e_name] = np.zeros(w_max)
 256
 257            for r_name in e_content[e_name]:
 258                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 259
 260            gamma_div = np.zeros(w_max)
 261            for r_name in e_content[e_name]:
 262                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 263            gamma_div[gamma_div < 1] = 1.0
 264            e_gamma[e_name] /= gamma_div[:w_max]
 265
 266            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 267                self.e_tauint[e_name] = 0.5
 268                self.e_dtauint[e_name] = 0.0
 269                self.e_dvalue[e_name] = 0.0
 270                self.e_ddvalue[e_name] = 0.0
 271                self.e_windowsize[e_name] = 0
 272                continue
 273
 274            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 275            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 276            # Make sure no entry of tauint is smaller than 0.5
 277            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 278            # hep-lat/0306017 eq. (42)
 279            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 280            self.e_n_dtauint[e_name][0] = 0.0
 281
 282            def _compute_drho(i):
 283                tmp = (self.e_rho[e_name][i + 1:w_max]
 284                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
 285                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
 286                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
 287                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 288
 289            if self.tau_exp[e_name] > 0:
 290                _compute_drho(1)
 291                texp = self.tau_exp[e_name]
 292                # Critical slowing down analysis
 293                if w_max // 2 <= 1:
 294                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 295                for n in range(1, w_max // 2):
 296                    _compute_drho(n + 1)
 297                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 298                        # Bias correction hep-lat/0306017 eq. (49) included
 299                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 300                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 301                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 302                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 303                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 304                        self.e_windowsize[e_name] = n
 305                        break
 306            else:
 307                if self.S[e_name] == 0.0:
 308                    self.e_tauint[e_name] = 0.5
 309                    self.e_dtauint[e_name] = 0.0
 310                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 311                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 312                    self.e_windowsize[e_name] = 0
 313                else:
 314                    # Standard automatic windowing procedure
 315                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 316                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 317                    for n in range(1, w_max):
 318                        if g_w[n - 1] < 0 or n >= w_max - 1:
 319                            _compute_drho(n)
 320                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 321                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 322                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 323                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 324                            self.e_windowsize[e_name] = n
 325                            break
 326
 327            self._dvalue += self.e_dvalue[e_name] ** 2
 328            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 329
 330        for e_name in self.cov_names:
 331            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 332            self.e_ddvalue[e_name] = 0
 333            self._dvalue += self.e_dvalue[e_name]**2
 334
 335        self._dvalue = np.sqrt(self._dvalue)
 336        if self._dvalue == 0.0:
 337            self.ddvalue = 0.0
 338        else:
 339            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 340        return
 341
 342    gm = gamma_method
 343
 344    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
 345        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 346           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 347
 348        Parameters
 349        ----------
 350        deltas : list
 351            List of fluctuations
 352        idx : list
 353            List or range of configurations on which the deltas are defined.
 354        shape : int
 355            Number of configurations in idx.
 356        w_max : int
 357            Upper bound for the summation window.
 358        fft : bool
 359            determines whether the fft algorithm is used for the computation
 360            of the autocorrelation function.
 361        gapsize : int
 362            The target distance between two configurations. If longer distances
 363            are found in idx, the data is expanded.
 364        """
 365        gamma = np.zeros(w_max)
 366        deltas = _expand_deltas(deltas, idx, shape, gapsize)
 367        new_shape = len(deltas)
 368        if fft:
 369            max_gamma = min(new_shape, w_max)
 370            # The padding for the fft has to be even
 371            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 372            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 373        else:
 374            for n in range(w_max):
 375                if new_shape - n >= 0:
 376                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 377
 378        return gamma
 379
 380    def details(self, ens_content=True):
 381        """Output detailed properties of the Obs.
 382
 383        Parameters
 384        ----------
 385        ens_content : bool
 386            print details about the ensembles and replica if true.
 387        """
 388        if self.tag is not None:
 389            print("Description:", self.tag)
 390        if not hasattr(self, 'e_dvalue'):
 391            print('Result\t %3.8e' % (self.value))
 392        else:
 393            if self.value == 0.0:
 394                percentage = np.nan
 395            else:
 396                percentage = np.abs(self._dvalue / self.value) * 100
 397            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 398            if len(self.e_names) > 1:
 399                print(' Ensemble errors:')
 400            e_content = self.e_content
 401            for e_name in self.mc_names:
 402                gap = _determine_gap(self, e_content, e_name)
 403
 404                if len(self.e_names) > 1:
 405                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 406                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 407                tau_string += f" in units of {gap} config"
 408                if gap > 1:
 409                    tau_string += "s"
 410                if self.tau_exp[e_name] > 0:
 411                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 412                else:
 413                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 414                print(tau_string)
 415            for e_name in self.cov_names:
 416                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 417        if ens_content is True:
 418            if len(self.e_names) == 1:
 419                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 420            else:
 421                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 422            my_string_list = []
 423            for key, value in sorted(self.e_content.items()):
 424                if key not in self.covobs:
 425                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 426                    if len(value) == 1:
 427                        my_string += f': {self.shape[value[0]]} configurations'
 428                        if isinstance(self.idl[value[0]], range):
 429                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 430                        else:
 431                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 432                    else:
 433                        sublist = []
 434                        for v in value:
 435                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 436                            my_substring += f': {self.shape[v]} configurations'
 437                            if isinstance(self.idl[v], range):
 438                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 439                            else:
 440                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 441                            sublist.append(my_substring)
 442
 443                        my_string += '\n' + '\n'.join(sublist)
 444                else:
 445                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 446                my_string_list.append(my_string)
 447            print('\n'.join(my_string_list))
 448
 449    def reweight(self, weight):
 450        """Reweight the obs with given rewighting factors.
 451
 452        Parameters
 453        ----------
 454        weight : Obs
 455            Reweighting factor. An Observable that has to be defined on a superset of the
 456            configurations in obs[i].idl for all i.
 457        all_configs : bool
 458            if True, the reweighted observables are normalized by the average of
 459            the reweighting factor on all configurations in weight.idl and not
 460            on the configurations in obs[i].idl. Default False.
 461        """
 462        return reweight(weight, [self])[0]
 463
 464    def is_zero_within_error(self, sigma=1):
 465        """Checks whether the observable is zero within 'sigma' standard errors.
 466
 467        Parameters
 468        ----------
 469        sigma : int
 470            Number of standard errors used for the check.
 471
 472        Works only properly when the gamma method was run.
 473        """
 474        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 475
 476    def is_zero(self, atol=1e-10):
 477        """Checks whether the observable is zero within a given tolerance.
 478
 479        Parameters
 480        ----------
 481        atol : float
 482            Absolute tolerance (for details see numpy documentation).
 483        """
 484        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 485
 486    def plot_tauint(self, save=None):
 487        """Plot integrated autocorrelation time for each ensemble.
 488
 489        Parameters
 490        ----------
 491        save : str
 492            saves the figure to a file named 'save' if.
 493        """
 494        if not hasattr(self, 'e_dvalue'):
 495            raise Exception('Run the gamma method first.')
 496
 497        for e, e_name in enumerate(self.mc_names):
 498            fig = plt.figure()
 499            plt.xlabel(r'$W$')
 500            plt.ylabel(r'$\tau_\mathrm{int}$')
 501            length = int(len(self.e_n_tauint[e_name]))
 502            if self.tau_exp[e_name] > 0:
 503                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 504                x_help = np.arange(2 * self.tau_exp[e_name])
 505                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 506                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 507                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 508                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 509                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 510                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 511                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 512            else:
 513                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 514                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 515
 516            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 517            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 518            plt.legend()
 519            plt.xlim(-0.5, xmax)
 520            ylim = plt.ylim()
 521            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 522            plt.draw()
 523            if save:
 524                fig.savefig(save + "_" + str(e))
 525
 526    def plot_rho(self, save=None):
 527        """Plot normalized autocorrelation function time for each ensemble.
 528
 529        Parameters
 530        ----------
 531        save : str
 532            saves the figure to a file named 'save' if.
 533        """
 534        if not hasattr(self, 'e_dvalue'):
 535            raise Exception('Run the gamma method first.')
 536        for e, e_name in enumerate(self.mc_names):
 537            fig = plt.figure()
 538            plt.xlabel('W')
 539            plt.ylabel('rho')
 540            length = int(len(self.e_drho[e_name]))
 541            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 542            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 543            if self.tau_exp[e_name] > 0:
 544                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 545                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 546                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 547                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 548            else:
 549                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 550                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 551            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 552            plt.xlim(-0.5, xmax)
 553            plt.draw()
 554            if save:
 555                fig.savefig(save + "_" + str(e))
 556
 557    def plot_rep_dist(self):
 558        """Plot replica distribution for each ensemble with more than one replicum."""
 559        if not hasattr(self, 'e_dvalue'):
 560            raise Exception('Run the gamma method first.')
 561        for e, e_name in enumerate(self.mc_names):
 562            if len(self.e_content[e_name]) == 1:
 563                print('No replica distribution for a single replicum (', e_name, ')')
 564                continue
 565            r_length = []
 566            sub_r_mean = 0
 567            for r, r_name in enumerate(self.e_content[e_name]):
 568                r_length.append(len(self.deltas[r_name]))
 569                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 570            e_N = np.sum(r_length)
 571            sub_r_mean /= e_N
 572            arr = np.zeros(len(self.e_content[e_name]))
 573            for r, r_name in enumerate(self.e_content[e_name]):
 574                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 575            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 576            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 577            plt.draw()
 578
 579    def plot_history(self, expand=True):
 580        """Plot derived Monte Carlo history for each ensemble
 581
 582        Parameters
 583        ----------
 584        expand : bool
 585            show expanded history for irregular Monte Carlo chains (default: True).
 586        """
 587        for e, e_name in enumerate(self.mc_names):
 588            plt.figure()
 589            r_length = []
 590            tmp = []
 591            tmp_expanded = []
 592            for r, r_name in enumerate(self.e_content[e_name]):
 593                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 594                if expand:
 595                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
 596                    r_length.append(len(tmp_expanded[-1]))
 597                else:
 598                    r_length.append(len(tmp[-1]))
 599            e_N = np.sum(r_length)
 600            x = np.arange(e_N)
 601            y_test = np.concatenate(tmp, axis=0)
 602            if expand:
 603                y = np.concatenate(tmp_expanded, axis=0)
 604            else:
 605                y = y_test
 606            plt.errorbar(x, y, fmt='.', markersize=3)
 607            plt.xlim(-0.5, e_N - 0.5)
 608            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 609            plt.draw()
 610
 611    def plot_piechart(self, save=None):
 612        """Plot piechart which shows the fractional contribution of each
 613        ensemble to the error and returns a dictionary containing the fractions.
 614
 615        Parameters
 616        ----------
 617        save : str
 618            saves the figure to a file named 'save' if.
 619        """
 620        if not hasattr(self, 'e_dvalue'):
 621            raise Exception('Run the gamma method first.')
 622        if np.isclose(0.0, self._dvalue, atol=1e-15):
 623            raise Exception('Error is 0.0')
 624        labels = self.e_names
 625        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 626        fig1, ax1 = plt.subplots()
 627        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 628        ax1.axis('equal')
 629        plt.draw()
 630        if save:
 631            fig1.savefig(save)
 632
 633        return dict(zip(labels, sizes))
 634
 635    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 636        """Dump the Obs to a file 'name' of chosen format.
 637
 638        Parameters
 639        ----------
 640        filename : str
 641            name of the file to be saved.
 642        datatype : str
 643            Format of the exported file. Supported formats include
 644            "json.gz" and "pickle"
 645        description : str
 646            Description for output file, only relevant for json.gz format.
 647        path : str
 648            specifies a custom path for the file (default '.')
 649        """
 650        if 'path' in kwargs:
 651            file_name = kwargs.get('path') + '/' + filename
 652        else:
 653            file_name = filename
 654
 655        if datatype == "json.gz":
 656            from .input.json import dump_to_json
 657            dump_to_json([self], file_name, description=description)
 658        elif datatype == "pickle":
 659            with open(file_name + '.p', 'wb') as fb:
 660                pickle.dump(self, fb)
 661        else:
 662            raise Exception("Unknown datatype " + str(datatype))
 663
 664    def export_jackknife(self):
 665        """Export jackknife samples from the Obs
 666
 667        Returns
 668        -------
 669        numpy.ndarray
 670            Returns a numpy array of length N + 1 where N is the number of samples
 671            for the given ensemble and replicum. The zeroth entry of the array contains
 672            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 673            derived from the Obs. The current implementation only works for observables
 674            defined on exactly one ensemble and replicum. The derived jackknife samples
 675            should agree with samples from a full jackknife analysis up to O(1/N).
 676        """
 677
 678        if len(self.names) != 1:
 679            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 680
 681        name = self.names[0]
 682        full_data = self.deltas[name] + self.r_values[name]
 683        n = full_data.size
 684        mean = self.value
 685        tmp_jacks = np.zeros(n + 1)
 686        tmp_jacks[0] = mean
 687        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 688        return tmp_jacks
 689
 690    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
 691        """Export bootstrap samples from the Obs
 692
 693        Parameters
 694        ----------
 695        samples : int
 696            Number of bootstrap samples to generate.
 697        random_numbers : np.ndarray
 698            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
 699            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
 700        save_rng : str
 701            Save the random numbers to a file if a path is specified.
 702
 703        Returns
 704        -------
 705        numpy.ndarray
 706            Returns a numpy array of length N + 1 where N is the number of samples
 707            for the given ensemble and replicum. The zeroth entry of the array contains
 708            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
 709            derived from the Obs. The current implementation only works for observables
 710            defined on exactly one ensemble and replicum. The derived bootstrap samples
 711            should agree with samples from a full bootstrap analysis up to O(1/N).
 712        """
 713        if len(self.names) != 1:
 714            raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
 715
 716        name = self.names[0]
 717        length = self.N
 718
 719        if random_numbers is None:
 720            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
 721            rng = np.random.default_rng(seed)
 722            random_numbers = rng.integers(0, length, size=(samples, length))
 723
 724        if save_rng is not None:
 725            np.savetxt(save_rng, random_numbers, fmt='%i')
 726
 727        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
 728        ret = np.zeros(samples + 1)
 729        ret[0] = self.value
 730        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
 731        return ret
 732
 733    def __float__(self):
 734        return float(self.value)
 735
 736    def __repr__(self):
 737        return 'Obs[' + str(self) + ']'
 738
 739    def __str__(self):
 740        return _format_uncertainty(self.value, self._dvalue)
 741
 742    def __format__(self, format_type):
 743        if format_type == "":
 744            significance = 2
 745        else:
 746            significance = int(float(format_type.replace("+", "").replace("-", "")))
 747        my_str = _format_uncertainty(self.value, self._dvalue,
 748                                     significance=significance)
 749        for char in ["+", " "]:
 750            if format_type.startswith(char):
 751                if my_str[0] != "-":
 752                    my_str = char + my_str
 753        return my_str
 754
 755    def __hash__(self):
 756        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 757        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 758        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 759        hash_tuple += tuple([o.encode() for o in self.names])
 760        m = hashlib.md5()
 761        [m.update(o) for o in hash_tuple]
 762        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 763
 764    # Overload comparisons
 765    def __lt__(self, other):
 766        return self.value < other
 767
 768    def __le__(self, other):
 769        return self.value <= other
 770
 771    def __gt__(self, other):
 772        return self.value > other
 773
 774    def __ge__(self, other):
 775        return self.value >= other
 776
 777    def __eq__(self, other):
 778        if other is None:
 779            return False
 780        return (self - other).is_zero()
 781
 782    # Overload math operations
 783    def __add__(self, y):
 784        if isinstance(y, Obs):
 785            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 786        else:
 787            if isinstance(y, np.ndarray):
 788                return np.array([self + o for o in y])
 789            elif isinstance(y, complex):
 790                return CObs(self, 0) + y
 791            elif y.__class__.__name__ in ['Corr', 'CObs']:
 792                return NotImplemented
 793            else:
 794                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 795
 796    def __radd__(self, y):
 797        return self + y
 798
 799    def __mul__(self, y):
 800        if isinstance(y, Obs):
 801            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 802        else:
 803            if isinstance(y, np.ndarray):
 804                return np.array([self * o for o in y])
 805            elif isinstance(y, complex):
 806                return CObs(self * y.real, self * y.imag)
 807            elif y.__class__.__name__ in ['Corr', 'CObs']:
 808                return NotImplemented
 809            else:
 810                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 811
 812    def __rmul__(self, y):
 813        return self * y
 814
 815    def __sub__(self, y):
 816        if isinstance(y, Obs):
 817            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 818        else:
 819            if isinstance(y, np.ndarray):
 820                return np.array([self - o for o in y])
 821            elif y.__class__.__name__ in ['Corr', 'CObs']:
 822                return NotImplemented
 823            else:
 824                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 825
 826    def __rsub__(self, y):
 827        return -1 * (self - y)
 828
 829    def __pos__(self):
 830        return self
 831
 832    def __neg__(self):
 833        return -1 * self
 834
 835    def __truediv__(self, y):
 836        if isinstance(y, Obs):
 837            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 838        else:
 839            if isinstance(y, np.ndarray):
 840                return np.array([self / o for o in y])
 841            elif y.__class__.__name__ in ['Corr', 'CObs']:
 842                return NotImplemented
 843            else:
 844                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 845
 846    def __rtruediv__(self, y):
 847        if isinstance(y, Obs):
 848            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 849        else:
 850            if isinstance(y, np.ndarray):
 851                return np.array([o / self for o in y])
 852            elif y.__class__.__name__ in ['Corr', 'CObs']:
 853                return NotImplemented
 854            else:
 855                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 856
 857    def __pow__(self, y):
 858        if isinstance(y, Obs):
 859            return derived_observable(lambda x, **kwargs: x[0] ** x[1], [self, y], man_grad=[y.value * self.value ** (y.value - 1), self.value ** y.value * np.log(self.value)])
 860        else:
 861            return derived_observable(lambda x, **kwargs: x[0] ** y, [self], man_grad=[y * self.value ** (y - 1)])
 862
 863    def __rpow__(self, y):
 864        return derived_observable(lambda x, **kwargs: y ** x[0], [self], man_grad=[y ** self.value * np.log(y)])
 865
 866    def __abs__(self):
 867        return derived_observable(lambda x: anp.abs(x[0]), [self])
 868
 869    # Overload numpy functions
 870    def sqrt(self):
 871        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 872
 873    def log(self):
 874        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 875
 876    def exp(self):
 877        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 878
 879    def sin(self):
 880        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 881
 882    def cos(self):
 883        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 884
 885    def tan(self):
 886        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 887
 888    def arcsin(self):
 889        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 890
 891    def arccos(self):
 892        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 893
 894    def arctan(self):
 895        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 896
 897    def sinh(self):
 898        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 899
 900    def cosh(self):
 901        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 902
 903    def tanh(self):
 904        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 905
 906    def arcsinh(self):
 907        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 908
 909    def arccosh(self):
 910        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 911
 912    def arctanh(self):
 913        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 914
 915
 916class CObs:
 917    """Class for a complex valued observable."""
 918    __slots__ = ['_real', '_imag', 'tag']
 919
 920    def __init__(self, real, imag=0.0):
 921        self._real = real
 922        self._imag = imag
 923        self.tag = None
 924
 925    @property
 926    def real(self):
 927        return self._real
 928
 929    @property
 930    def imag(self):
 931        return self._imag
 932
 933    def gamma_method(self, **kwargs):
 934        """Executes the gamma_method for the real and the imaginary part."""
 935        if isinstance(self.real, Obs):
 936            self.real.gamma_method(**kwargs)
 937        if isinstance(self.imag, Obs):
 938            self.imag.gamma_method(**kwargs)
 939
 940    def is_zero(self):
 941        """Checks whether both real and imaginary part are zero within machine precision."""
 942        return self.real == 0.0 and self.imag == 0.0
 943
 944    def conjugate(self):
 945        return CObs(self.real, -self.imag)
 946
 947    def __add__(self, other):
 948        if isinstance(other, np.ndarray):
 949            return other + self
 950        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 951            return CObs(self.real + other.real,
 952                        self.imag + other.imag)
 953        else:
 954            return CObs(self.real + other, self.imag)
 955
 956    def __radd__(self, y):
 957        return self + y
 958
 959    def __sub__(self, other):
 960        if isinstance(other, np.ndarray):
 961            return -1 * (other - self)
 962        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 963            return CObs(self.real - other.real, self.imag - other.imag)
 964        else:
 965            return CObs(self.real - other, self.imag)
 966
 967    def __rsub__(self, other):
 968        return -1 * (self - other)
 969
 970    def __mul__(self, other):
 971        if isinstance(other, np.ndarray):
 972            return other * self
 973        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 974            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 975                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 976                                               [self.real, other.real, self.imag, other.imag],
 977                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 978                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 979                                               [self.real, other.real, self.imag, other.imag],
 980                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 981            elif getattr(other, 'imag', 0) != 0:
 982                return CObs(self.real * other.real - self.imag * other.imag,
 983                            self.imag * other.real + self.real * other.imag)
 984            else:
 985                return CObs(self.real * other.real, self.imag * other.real)
 986        else:
 987            return CObs(self.real * other, self.imag * other)
 988
 989    def __rmul__(self, other):
 990        return self * other
 991
 992    def __truediv__(self, other):
 993        if isinstance(other, np.ndarray):
 994            return 1 / (other / self)
 995        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 996            r = other.real ** 2 + other.imag ** 2
 997            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 998        else:
 999            return CObs(self.real / other, self.imag / other)
1000
1001    def __rtruediv__(self, other):
1002        r = self.real ** 2 + self.imag ** 2
1003        if hasattr(other, 'real') and hasattr(other, 'imag'):
1004            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1005        else:
1006            return CObs(self.real * other / r, -self.imag * other / r)
1007
1008    def __abs__(self):
1009        return np.sqrt(self.real**2 + self.imag**2)
1010
1011    def __pos__(self):
1012        return self
1013
1014    def __neg__(self):
1015        return -1 * self
1016
1017    def __eq__(self, other):
1018        return self.real == other.real and self.imag == other.imag
1019
1020    def __str__(self):
1021        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1022
1023    def __repr__(self):
1024        return 'CObs[' + str(self) + ']'
1025
1026    def __format__(self, format_type):
1027        if format_type == "":
1028            significance = 2
1029            format_type = "2"
1030        else:
1031            significance = int(float(format_type.replace("+", "").replace("-", "")))
1032        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"
1033
1034
1035def gamma_method(x, **kwargs):
1036    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1037
1038    See docstring of pe.Obs.gamma_method for details.
1039    """
1040    return np.vectorize(lambda o: o.gm(**kwargs))(x)
1041
1042
1043gm = gamma_method
1044
1045
1046def _format_uncertainty(value, dvalue, significance=2):
1047    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
1048    if dvalue == 0.0 or (not np.isfinite(dvalue)):
1049        return str(value)
1050    if not isinstance(significance, int):
1051        raise TypeError("significance needs to be an integer.")
1052    if significance < 1:
1053        raise ValueError("significance needs to be larger than zero.")
1054    fexp = np.floor(np.log10(dvalue))
1055    if fexp < 0.0:
1056        return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f')
1057    elif fexp == 0.0:
1058        return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})"
1059    else:
1060        return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})"
1061
1062
1063def _expand_deltas(deltas, idx, shape, gapsize):
1064    """Expand deltas defined on idx to a regular range with spacing gapsize between two
1065       configurations and where holes are filled by 0.
1066       If idx is of type range, the deltas are not changed if the idx.step == gapsize.
1067
1068    Parameters
1069    ----------
1070    deltas : list
1071        List of fluctuations
1072    idx : list
1073        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1074    shape : int
1075        Number of configs in idx.
1076    gapsize : int
1077        The target distance between two configurations. If longer distances
1078        are found in idx, the data is expanded.
1079    """
1080    if isinstance(idx, range):
1081        if (idx.step == gapsize):
1082            return deltas
1083    ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize)
1084    for i in range(shape):
1085        ret[(idx[i] - idx[0]) // gapsize] = deltas[i]
1086    return ret
1087
1088
1089def _merge_idx(idl):
1090    """Returns the union of all lists in idl as range or sorted list
1091
1092    Parameters
1093    ----------
1094    idl : list
1095        List of lists or ranges.
1096    """
1097
1098    if _check_lists_equal(idl):
1099        return idl[0]
1100
1101    idunion = sorted(set().union(*idl))
1102
1103    # Check whether idunion can be expressed as range
1104    idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0])
1105    idtest = [list(idrange), idunion]
1106    if _check_lists_equal(idtest):
1107        return idrange
1108
1109    return idunion
1110
1111
1112def _intersection_idx(idl):
1113    """Returns the intersection of all lists in idl as range or sorted list
1114
1115    Parameters
1116    ----------
1117    idl : list
1118        List of lists or ranges.
1119    """
1120
1121    if _check_lists_equal(idl):
1122        return idl[0]
1123
1124    idinter = sorted(set.intersection(*[set(o) for o in idl]))
1125
1126    # Check whether idinter can be expressed as range
1127    try:
1128        idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0])
1129        idtest = [list(idrange), idinter]
1130        if _check_lists_equal(idtest):
1131            return idrange
1132    except IndexError:
1133        pass
1134
1135    return idinter
1136
1137
1138def _expand_deltas_for_merge(deltas, idx, shape, new_idx, scalefactor):
1139    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1140       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1141       common divisor of the step sizes is used as new step size.
1142
1143    Parameters
1144    ----------
1145    deltas : list
1146        List of fluctuations
1147    idx : list
1148        List or range of configs on which the deltas are defined.
1149        Has to be a subset of new_idx and has to be sorted in ascending order.
1150    shape : list
1151        Number of configs in idx.
1152    new_idx : list
1153        List of configs that defines the new range, has to be sorted in ascending order.
1154    scalefactor : float
1155        An additional scaling factor that can be applied to scale the fluctuations,
1156        e.g., when Obs with differing numbers of replica are merged.
1157    """
1158    if type(idx) is range and type(new_idx) is range:
1159        if idx == new_idx:
1160            if scalefactor == 1:
1161                return deltas
1162            else:
1163                return deltas * scalefactor
1164    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1165    for i in range(shape):
1166        ret[idx[i] - new_idx[0]] = deltas[i]
1167    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx) * scalefactor
1168
1169
1170def derived_observable(func, data, array_mode=False, **kwargs):
1171    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1172
1173    Parameters
1174    ----------
1175    func : object
1176        arbitrary function of the form func(data, **kwargs). For the
1177        automatic differentiation to work, all numpy functions have to have
1178        the autograd wrapper (use 'import autograd.numpy as anp').
1179    data : list
1180        list of Obs, e.g. [obs1, obs2, obs3].
1181    num_grad : bool
1182        if True, numerical derivatives are used instead of autograd
1183        (default False). To control the numerical differentiation the
1184        kwargs of numdifftools.step_generators.MaxStepGenerator
1185        can be used.
1186    man_grad : list
1187        manually supply a list or an array which contains the jacobian
1188        of func. Use cautiously, supplying the wrong derivative will
1189        not be intercepted.
1190
1191    Notes
1192    -----
1193    For simple mathematical operations it can be practical to use anonymous
1194    functions. For the ratio of two observables one can e.g. use
1195
1196    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1197    """
1198
1199    data = np.asarray(data)
1200    raveled_data = data.ravel()
1201
1202    # Workaround for matrix operations containing non Obs data
1203    if not all(isinstance(x, Obs) for x in raveled_data):
1204        for i in range(len(raveled_data)):
1205            if isinstance(raveled_data[i], (int, float)):
1206                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1207
1208    allcov = {}
1209    for o in raveled_data:
1210        for name in o.cov_names:
1211            if name in allcov:
1212                if not np.allclose(allcov[name], o.covobs[name].cov):
1213                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1214            else:
1215                allcov[name] = o.covobs[name].cov
1216
1217    n_obs = len(raveled_data)
1218    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1219    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1220    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1221
1222    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1223
1224    if data.ndim == 1:
1225        values = np.array([o.value for o in data])
1226    else:
1227        values = np.vectorize(lambda x: x.value)(data)
1228
1229    new_values = func(values, **kwargs)
1230
1231    multi = int(isinstance(new_values, np.ndarray))
1232
1233    new_r_values = {}
1234    new_idl_d = {}
1235    for name in new_sample_names:
1236        idl = []
1237        tmp_values = np.zeros(n_obs)
1238        for i, item in enumerate(raveled_data):
1239            tmp_values[i] = item.r_values.get(name, item.value)
1240            tmp_idl = item.idl.get(name)
1241            if tmp_idl is not None:
1242                idl.append(tmp_idl)
1243        if multi > 0:
1244            tmp_values = np.array(tmp_values).reshape(data.shape)
1245        new_r_values[name] = func(tmp_values, **kwargs)
1246        new_idl_d[name] = _merge_idx(idl)
1247
1248    def _compute_scalefactor_missing_rep(obs):
1249        """
1250        Computes the scale factor that is to be multiplied with the deltas
1251        in the case where Obs with different subsets of replica are merged.
1252        Returns a dictionary with the scale factor for each Monte Carlo name.
1253
1254        Parameters
1255        ----------
1256        obs : Obs
1257            The observable corresponding to the deltas that are to be scaled
1258        """
1259        scalef_d = {}
1260        for mc_name in obs.mc_names:
1261            mc_idl_d = [name for name in obs.idl if name.startswith(mc_name + '|')]
1262            new_mc_idl_d = [name for name in new_idl_d if name.startswith(mc_name + '|')]
1263            if len(mc_idl_d) > 0 and len(mc_idl_d) < len(new_mc_idl_d):
1264                scalef_d[mc_name] = sum([len(new_idl_d[name]) for name in new_mc_idl_d]) / sum([len(new_idl_d[name]) for name in mc_idl_d])
1265        return scalef_d
1266
1267    if 'man_grad' in kwargs:
1268        deriv = np.asarray(kwargs.get('man_grad'))
1269        if new_values.shape + data.shape != deriv.shape:
1270            raise Exception('Manual derivative does not have correct shape.')
1271    elif kwargs.get('num_grad') is True:
1272        if multi > 0:
1273            raise Exception('Multi mode currently not supported for numerical derivative')
1274        options = {
1275            'base_step': 0.1,
1276            'step_ratio': 2.5}
1277        for key in options.keys():
1278            kwarg = kwargs.get(key)
1279            if kwarg is not None:
1280                options[key] = kwarg
1281        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1282        if tmp_df.size == 1:
1283            deriv = np.array([tmp_df.real])
1284        else:
1285            deriv = tmp_df.real
1286    else:
1287        deriv = jacobian(func)(values, **kwargs)
1288
1289    final_result = np.zeros(new_values.shape, dtype=object)
1290
1291    if array_mode is True:
1292
1293        class _Zero_grad():
1294            def __init__(self, N):
1295                self.grad = np.zeros((N, 1))
1296
1297        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1298        d_extracted = {}
1299        g_extracted = {}
1300        for name in new_sample_names:
1301            d_extracted[name] = []
1302            ens_length = len(new_idl_d[name])
1303            for i_dat, dat in enumerate(data):
1304                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name], _compute_scalefactor_missing_rep(o).get(name.split('|')[0], 1)) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1305        for name in new_cov_names:
1306            g_extracted[name] = []
1307            zero_grad = _Zero_grad(new_covobs_lengths[name])
1308            for i_dat, dat in enumerate(data):
1309                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1310
1311    for i_val, new_val in np.ndenumerate(new_values):
1312        new_deltas = {}
1313        new_grad = {}
1314        if array_mode is True:
1315            for name in new_sample_names:
1316                ens_length = d_extracted[name][0].shape[-1]
1317                new_deltas[name] = np.zeros(ens_length)
1318                for i_dat, dat in enumerate(d_extracted[name]):
1319                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1320            for name in new_cov_names:
1321                new_grad[name] = 0
1322                for i_dat, dat in enumerate(g_extracted[name]):
1323                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1324        else:
1325            for j_obs, obs in np.ndenumerate(data):
1326                scalef_d = _compute_scalefactor_missing_rep(obs)
1327                for name in obs.names:
1328                    if name in obs.cov_names:
1329                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1330                    else:
1331                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name], scalef_d.get(name.split('|')[0], 1))
1332
1333        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1334
1335        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1336            raise Exception('The same name has been used for deltas and covobs!')
1337        new_samples = []
1338        new_means = []
1339        new_idl = []
1340        new_names_obs = []
1341        for name in new_names:
1342            if name not in new_covobs:
1343                new_samples.append(new_deltas[name])
1344                new_idl.append(new_idl_d[name])
1345                new_means.append(new_r_values[name][i_val])
1346                new_names_obs.append(name)
1347        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1348        for name in new_covobs:
1349            final_result[i_val].names.append(name)
1350        final_result[i_val]._covobs = new_covobs
1351        final_result[i_val]._value = new_val
1352        final_result[i_val].reweighted = reweighted
1353
1354    if multi == 0:
1355        final_result = final_result.item()
1356
1357    return final_result
1358
1359
1360def _reduce_deltas(deltas, idx_old, idx_new):
1361    """Extract deltas defined on idx_old on all configs of idx_new.
1362
1363    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1364    are ordered in an ascending order.
1365
1366    Parameters
1367    ----------
1368    deltas : list
1369        List of fluctuations
1370    idx_old : list
1371        List or range of configs on which the deltas are defined
1372    idx_new : list
1373        List of configs for which we want to extract the deltas.
1374        Has to be a subset of idx_old.
1375    """
1376    if not len(deltas) == len(idx_old):
1377        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1378    if type(idx_old) is range and type(idx_new) is range:
1379        if idx_old == idx_new:
1380            return deltas
1381    if _check_lists_equal([idx_old, idx_new]):
1382        return deltas
1383    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1384    if len(indices) < len(idx_new):
1385        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1386    return np.array(deltas)[indices]
1387
1388
1389def reweight(weight, obs, **kwargs):
1390    """Reweight a list of observables.
1391
1392    Parameters
1393    ----------
1394    weight : Obs
1395        Reweighting factor. An Observable that has to be defined on a superset of the
1396        configurations in obs[i].idl for all i.
1397    obs : list
1398        list of Obs, e.g. [obs1, obs2, obs3].
1399    all_configs : bool
1400        if True, the reweighted observables are normalized by the average of
1401        the reweighting factor on all configurations in weight.idl and not
1402        on the configurations in obs[i].idl. Default False.
1403    """
1404    result = []
1405    for i in range(len(obs)):
1406        if len(obs[i].cov_names):
1407            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1408        if not set(obs[i].names).issubset(weight.names):
1409            raise Exception('Error: Ensembles do not fit')
1410        for name in obs[i].names:
1411            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1412                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1413        new_samples = []
1414        w_deltas = {}
1415        for name in sorted(obs[i].names):
1416            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1417            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1418        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1419
1420        if kwargs.get('all_configs'):
1421            new_weight = weight
1422        else:
1423            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1424
1425        result.append(tmp_obs / new_weight)
1426        result[-1].reweighted = True
1427
1428    return result
1429
1430
1431def correlate(obs_a, obs_b):
1432    """Correlate two observables.
1433
1434    Parameters
1435    ----------
1436    obs_a : Obs
1437        First observable
1438    obs_b : Obs
1439        Second observable
1440
1441    Notes
1442    -----
1443    Keep in mind to only correlate primary observables which have not been reweighted
1444    yet. The reweighting has to be applied after correlating the observables.
1445    Currently only works if ensembles are identical (this is not strictly necessary).
1446    """
1447
1448    if sorted(obs_a.names) != sorted(obs_b.names):
1449        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1450    if len(obs_a.cov_names) or len(obs_b.cov_names):
1451        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1452    for name in obs_a.names:
1453        if obs_a.shape[name] != obs_b.shape[name]:
1454            raise Exception('Shapes of ensemble', name, 'do not fit')
1455        if obs_a.idl[name] != obs_b.idl[name]:
1456            raise Exception('idl of ensemble', name, 'do not fit')
1457
1458    if obs_a.reweighted is True:
1459        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1460    if obs_b.reweighted is True:
1461        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1462
1463    new_samples = []
1464    new_idl = []
1465    for name in sorted(obs_a.names):
1466        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1467        new_idl.append(obs_a.idl[name])
1468
1469    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1470    o.reweighted = obs_a.reweighted or obs_b.reweighted
1471    return o
1472
1473
1474def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1475    r'''Calculates the error covariance matrix of a set of observables.
1476
1477    WARNING: This function should be used with care, especially for observables with support on multiple
1478             ensembles with differing autocorrelations. See the notes below for details.
1479
1480    The gamma method has to be applied first to all observables.
1481
1482    Parameters
1483    ----------
1484    obs : list or numpy.ndarray
1485        List or one dimensional array of Obs
1486    visualize : bool
1487        If True plots the corresponding normalized correlation matrix (default False).
1488    correlation : bool
1489        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1490    smooth : None or int
1491        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1492        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1493        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1494        small ones.
1495
1496    Notes
1497    -----
1498    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1499    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1500    in the absence of autocorrelation.
1501    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1502    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1503    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1504    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1505    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1506    '''
1507
1508    length = len(obs)
1509
1510    max_samples = np.max([o.N for o in obs])
1511    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1512        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1513
1514    cov = np.zeros((length, length))
1515    for i in range(length):
1516        for j in range(i, length):
1517            cov[i, j] = _covariance_element(obs[i], obs[j])
1518    cov = cov + cov.T - np.diag(np.diag(cov))
1519
1520    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1521
1522    if isinstance(smooth, int):
1523        corr = _smooth_eigenvalues(corr, smooth)
1524
1525    if visualize:
1526        plt.matshow(corr, vmin=-1, vmax=1)
1527        plt.set_cmap('RdBu')
1528        plt.colorbar()
1529        plt.draw()
1530
1531    if correlation is True:
1532        return corr
1533
1534    errors = [o.dvalue for o in obs]
1535    cov = np.diag(errors) @ corr @ np.diag(errors)
1536
1537    eigenvalues = np.linalg.eigh(cov)[0]
1538    if not np.all(eigenvalues >= 0):
1539        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1540
1541    return cov
1542
1543
1544def invert_corr_cov_cholesky(corr, inverrdiag):
1545    """Constructs a lower triangular matrix `chol` via the Cholesky decomposition of the correlation matrix `corr`
1546       and then returns the inverse covariance matrix `chol_inv` as a lower triangular matrix by solving `chol * x = inverrdiag`.
1547
1548    Parameters
1549    ----------
1550    corr : np.ndarray
1551           correlation matrix
1552    inverrdiag : np.ndarray
1553              diagonal matrix, the entries are the inverse errors of the data points considered
1554    """
1555
1556    condn = np.linalg.cond(corr)
1557    if condn > 0.1 / np.finfo(float).eps:
1558        raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
1559    if condn > 1e13:
1560        warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
1561    chol = np.linalg.cholesky(corr)
1562    chol_inv = scipy.linalg.solve_triangular(chol, inverrdiag, lower=True)
1563
1564    return chol_inv
1565
1566
1567def sort_corr(corr, kl, yd):
1568    """ Reorders a correlation matrix to match the alphabetical order of its underlying y data.
1569
1570    The ordering of the input correlation matrix `corr` is given by the list of keys `kl`.
1571    The input dictionary `yd` (with the same keys `kl`) must contain the corresponding y data
1572    that the correlation matrix is based on.
1573    This function sorts the list of keys `kl` alphabetically and sorts the matrix `corr`
1574    according to this alphabetical order such that the sorted matrix `corr_sorted` corresponds
1575    to the y data `yd` when arranged in an alphabetical order by its keys.
1576
1577    Parameters
1578    ----------
1579    corr : np.ndarray
1580        A square correlation matrix constructed using the order of the y data specified by `kl`.
1581        The dimensions of `corr` should match the total number of y data points in `yd` combined.
1582    kl : list of str
1583        A list of keys that denotes the order in which the y data from `yd` was used to build the
1584        input correlation matrix `corr`.
1585    yd : dict of list
1586        A dictionary where each key corresponds to a unique identifier, and its value is a list of
1587        y data points. The total number of y data points across all keys must match the dimensions
1588        of `corr`. The lists in the dictionary can be lists of Obs.
1589
1590    Returns
1591    -------
1592    np.ndarray
1593        A new, sorted correlation matrix that corresponds to the y data from `yd` when arranged alphabetically by its keys.
1594
1595    Example
1596    -------
1597    >>> import numpy as np
1598    >>> import pyerrors as pe
1599    >>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
1600    >>> kl = ['b', 'a']
1601    >>> yd = {'a': [1, 2], 'b': [3]}
1602    >>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
1603    >>> print(sorted_corr)
1604    array([[1. , 0.3, 0.4],
1605           [0.3, 1. , 0.2],
1606           [0.4, 0.2, 1. ]])
1607
1608    """
1609    kl_sorted = sorted(kl)
1610
1611    posd = {}
1612    ofs = 0
1613    for ki, k in enumerate(kl):
1614        posd[k] = [i + ofs for i in range(len(yd[k]))]
1615        ofs += len(posd[k])
1616
1617    mapping = []
1618    for k in kl_sorted:
1619        for i in range(len(yd[k])):
1620            mapping.append(posd[k][i])
1621
1622    corr_sorted = np.zeros_like(corr)
1623    for i in range(corr.shape[0]):
1624        for j in range(corr.shape[0]):
1625            corr_sorted[i][j] = corr[mapping[i]][mapping[j]]
1626
1627    return corr_sorted
1628
1629
1630def _smooth_eigenvalues(corr, E):
1631    """Eigenvalue smoothing as described in hep-lat/9412087
1632
1633    corr : np.ndarray
1634        correlation matrix
1635    E : integer
1636        Number of eigenvalues to be left substantially unchanged
1637    """
1638    if not (2 < E < corr.shape[0] - 1):
1639        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1640    vals, vec = np.linalg.eigh(corr)
1641    lambda_min = np.mean(vals[:-E])
1642    vals[vals < lambda_min] = lambda_min
1643    vals /= np.mean(vals)
1644    return vec @ np.diag(vals) @ vec.T
1645
1646
1647def _covariance_element(obs1, obs2):
1648    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1649
1650    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1651        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1652        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1653        return np.sum(deltas1 * deltas2)
1654
1655    if set(obs1.names).isdisjoint(set(obs2.names)):
1656        return 0.0
1657
1658    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1659        raise Exception('The gamma method has to be applied to both Obs first.')
1660
1661    dvalue = 0.0
1662
1663    for e_name in obs1.mc_names:
1664
1665        if e_name not in obs2.mc_names:
1666            continue
1667
1668        idl_d = {}
1669        for r_name in obs1.e_content[e_name]:
1670            if r_name not in obs2.e_content[e_name]:
1671                continue
1672            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1673
1674        gamma = 0.0
1675
1676        for r_name in obs1.e_content[e_name]:
1677            if r_name not in obs2.e_content[e_name]:
1678                continue
1679            if len(idl_d[r_name]) == 0:
1680                continue
1681            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1682
1683        if gamma == 0.0:
1684            continue
1685
1686        gamma_div = 0.0
1687        for r_name in obs1.e_content[e_name]:
1688            if r_name not in obs2.e_content[e_name]:
1689                continue
1690            if len(idl_d[r_name]) == 0:
1691                continue
1692            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1693        gamma /= gamma_div
1694
1695        dvalue += gamma
1696
1697    for e_name in obs1.cov_names:
1698
1699        if e_name not in obs2.cov_names:
1700            continue
1701
1702        dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item()
1703
1704    return dvalue
1705
1706
1707def import_jackknife(jacks, name, idl=None):
1708    """Imports jackknife samples and returns an Obs
1709
1710    Parameters
1711    ----------
1712    jacks : numpy.ndarray
1713        numpy array containing the mean value as zeroth entry and
1714        the N jackknife samples as first to Nth entry.
1715    name : str
1716        name of the ensemble the samples are defined on.
1717    """
1718    length = len(jacks) - 1
1719    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1720    samples = jacks[1:] @ prj
1721    mean = np.mean(samples)
1722    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1723    new_obs._value = jacks[0]
1724    return new_obs
1725
1726
1727def import_bootstrap(boots, name, random_numbers):
1728    """Imports bootstrap samples and returns an Obs
1729
1730    Parameters
1731    ----------
1732    boots : numpy.ndarray
1733        numpy array containing the mean value as zeroth entry and
1734        the N bootstrap samples as first to Nth entry.
1735    name : str
1736        name of the ensemble the samples are defined on.
1737    random_numbers : np.ndarray
1738        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1739        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1740        chain to be reconstructed.
1741    """
1742    samples, length = random_numbers.shape
1743    if samples != len(boots) - 1:
1744        raise ValueError("Random numbers do not have the correct shape.")
1745
1746    if samples < length:
1747        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1748
1749    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1750
1751    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1752    ret = Obs([samples], [name])
1753    ret._value = boots[0]
1754    return ret
1755
1756
1757def merge_obs(list_of_obs):
1758    """Combine all observables in list_of_obs into one new observable
1759
1760    Parameters
1761    ----------
1762    list_of_obs : list
1763        list of the Obs object to be combined
1764
1765    Notes
1766    -----
1767    It is not possible to combine obs which are based on the same replicum
1768    """
1769    replist = [item for obs in list_of_obs for item in obs.names]
1770    if (len(replist) == len(set(replist))) is False:
1771        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1772    if any([len(o.cov_names) for o in list_of_obs]):
1773        raise Exception('Not possible to merge data that contains covobs!')
1774    new_dict = {}
1775    idl_dict = {}
1776    for o in list_of_obs:
1777        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1778                        for key in set(o.deltas) | set(o.r_values)})
1779        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1780
1781    names = sorted(new_dict.keys())
1782    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1783    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1784    return o
1785
1786
1787def cov_Obs(means, cov, name, grad=None):
1788    """Create an Obs based on mean(s) and a covariance matrix
1789
1790    Parameters
1791    ----------
1792    mean : list of floats or float
1793        N mean value(s) of the new Obs
1794    cov : list or array
1795        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1796    name : str
1797        identifier for the covariance matrix
1798    grad : list or array
1799        Gradient of the Covobs wrt. the means belonging to cov.
1800    """
1801
1802    def covobs_to_obs(co):
1803        """Make an Obs out of a Covobs
1804
1805        Parameters
1806        ----------
1807        co : Covobs
1808            Covobs to be embedded into the Obs
1809        """
1810        o = Obs([], [], means=[])
1811        o._value = co.value
1812        o.names.append(co.name)
1813        o._covobs[co.name] = co
1814        o._dvalue = np.sqrt(co.errsq())
1815        return o
1816
1817    ol = []
1818    if isinstance(means, (float, int)):
1819        means = [means]
1820
1821    for i in range(len(means)):
1822        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1823    if ol[0].covobs[name].N != len(means):
1824        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1825    if len(ol) == 1:
1826        return ol[0]
1827    return ol
1828
1829
1830def _determine_gap(o, e_content, e_name):
1831    gaps = []
1832    for r_name in e_content[e_name]:
1833        if isinstance(o.idl[r_name], range):
1834            gaps.append(o.idl[r_name].step)
1835        else:
1836            gaps.append(np.min(np.diff(o.idl[r_name])))
1837
1838    gap = min(gaps)
1839    if not np.all([gi % gap == 0 for gi in gaps]):
1840        raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps)
1841
1842    return gap
1843
1844
1845def _check_lists_equal(idl):
1846    '''
1847    Use groupby to efficiently check whether all elements of idl are identical.
1848    Returns True if all elements are equal, otherwise False.
1849
1850    Parameters
1851    ----------
1852    idl : list of lists, ranges or np.ndarrays
1853    '''
1854    g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl])
1855    if next(g, True) and not next(g, False):
1856        return True
1857    return False

class CObs: View Source

 917class CObs:
 918    """Class for a complex valued observable."""
 919    __slots__ = ['_real', '_imag', 'tag']
 920
 921    def __init__(self, real, imag=0.0):
 922        self._real = real
 923        self._imag = imag
 924        self.tag = None
 925
 926    @property
 927    def real(self):
 928        return self._real
 929
 930    @property
 931    def imag(self):
 932        return self._imag
 933
 934    def gamma_method(self, **kwargs):
 935        """Executes the gamma_method for the real and the imaginary part."""
 936        if isinstance(self.real, Obs):
 937            self.real.gamma_method(**kwargs)
 938        if isinstance(self.imag, Obs):
 939            self.imag.gamma_method(**kwargs)
 940
 941    def is_zero(self):
 942        """Checks whether both real and imaginary part are zero within machine precision."""
 943        return self.real == 0.0 and self.imag == 0.0
 944
 945    def conjugate(self):
 946        return CObs(self.real, -self.imag)
 947
 948    def __add__(self, other):
 949        if isinstance(other, np.ndarray):
 950            return other + self
 951        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 952            return CObs(self.real + other.real,
 953                        self.imag + other.imag)
 954        else:
 955            return CObs(self.real + other, self.imag)
 956
 957    def __radd__(self, y):
 958        return self + y
 959
 960    def __sub__(self, other):
 961        if isinstance(other, np.ndarray):
 962            return -1 * (other - self)
 963        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 964            return CObs(self.real - other.real, self.imag - other.imag)
 965        else:
 966            return CObs(self.real - other, self.imag)
 967
 968    def __rsub__(self, other):
 969        return -1 * (self - other)
 970
 971    def __mul__(self, other):
 972        if isinstance(other, np.ndarray):
 973            return other * self
 974        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 975            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 976                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 977                                               [self.real, other.real, self.imag, other.imag],
 978                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 979                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 980                                               [self.real, other.real, self.imag, other.imag],
 981                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 982            elif getattr(other, 'imag', 0) != 0:
 983                return CObs(self.real * other.real - self.imag * other.imag,
 984                            self.imag * other.real + self.real * other.imag)
 985            else:
 986                return CObs(self.real * other.real, self.imag * other.real)
 987        else:
 988            return CObs(self.real * other, self.imag * other)
 989
 990    def __rmul__(self, other):
 991        return self * other
 992
 993    def __truediv__(self, other):
 994        if isinstance(other, np.ndarray):
 995            return 1 / (other / self)
 996        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 997            r = other.real ** 2 + other.imag ** 2
 998            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 999        else:
1000            return CObs(self.real / other, self.imag / other)
1001
1002    def __rtruediv__(self, other):
1003        r = self.real ** 2 + self.imag ** 2
1004        if hasattr(other, 'real') and hasattr(other, 'imag'):
1005            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1006        else:
1007            return CObs(self.real * other / r, -self.imag * other / r)
1008
1009    def __abs__(self):
1010        return np.sqrt(self.real**2 + self.imag**2)
1011
1012    def __pos__(self):
1013        return self
1014
1015    def __neg__(self):
1016        return -1 * self
1017
1018    def __eq__(self, other):
1019        return self.real == other.real and self.imag == other.imag
1020
1021    def __str__(self):
1022        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1023
1024    def __repr__(self):
1025        return 'CObs[' + str(self) + ']'
1026
1027    def __format__(self, format_type):
1028        if format_type == "":
1029            significance = 2
1030            format_type = "2"
1031        else:
1032            significance = int(float(format_type.replace("+", "").replace("-", "")))
1033        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"

Class for a complex valued observable.

CObs(real, imag=0.0) View Source

921    def __init__(self, real, imag=0.0):
922        self._real = real
923        self._imag = imag
924        self.tag = None

tag

real View Source

926    @property
927    def real(self):
928        return self._real

imag View Source

930    @property
931    def imag(self):
932        return self._imag

def gamma_method(self, **kwargs): View Source

934    def gamma_method(self, **kwargs):
935        """Executes the gamma_method for the real and the imaginary part."""
936        if isinstance(self.real, Obs):
937            self.real.gamma_method(**kwargs)
938        if isinstance(self.imag, Obs):
939            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self): View Source

941    def is_zero(self):
942        """Checks whether both real and imaginary part are zero within machine precision."""
943        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self): View Source

945    def conjugate(self):
946        return CObs(self.real, -self.imag)

def gamma_method(x, **kwargs): View Source

1036def gamma_method(x, **kwargs):
1037    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1038
1039    See docstring of pe.Obs.gamma_method for details.
1040    """
1041    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def gm(x, **kwargs): View Source

1036def gamma_method(x, **kwargs):
1037    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1038
1039    See docstring of pe.Obs.gamma_method for details.
1040    """
1041    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def derived_observable(func, data, array_mode=False, **kwargs): View Source

1171def derived_observable(func, data, array_mode=False, **kwargs):
1172    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1173
1174    Parameters
1175    ----------
1176    func : object
1177        arbitrary function of the form func(data, **kwargs). For the
1178        automatic differentiation to work, all numpy functions have to have
1179        the autograd wrapper (use 'import autograd.numpy as anp').
1180    data : list
1181        list of Obs, e.g. [obs1, obs2, obs3].
1182    num_grad : bool
1183        if True, numerical derivatives are used instead of autograd
1184        (default False). To control the numerical differentiation the
1185        kwargs of numdifftools.step_generators.MaxStepGenerator
1186        can be used.
1187    man_grad : list
1188        manually supply a list or an array which contains the jacobian
1189        of func. Use cautiously, supplying the wrong derivative will
1190        not be intercepted.
1191
1192    Notes
1193    -----
1194    For simple mathematical operations it can be practical to use anonymous
1195    functions. For the ratio of two observables one can e.g. use
1196
1197    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1198    """
1199
1200    data = np.asarray(data)
1201    raveled_data = data.ravel()
1202
1203    # Workaround for matrix operations containing non Obs data
1204    if not all(isinstance(x, Obs) for x in raveled_data):
1205        for i in range(len(raveled_data)):
1206            if isinstance(raveled_data[i], (int, float)):
1207                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1208
1209    allcov = {}
1210    for o in raveled_data:
1211        for name in o.cov_names:
1212            if name in allcov:
1213                if not np.allclose(allcov[name], o.covobs[name].cov):
1214                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1215            else:
1216                allcov[name] = o.covobs[name].cov
1217
1218    n_obs = len(raveled_data)
1219    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1220    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1221    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1222
1223    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1224
1225    if data.ndim == 1:
1226        values = np.array([o.value for o in data])
1227    else:
1228        values = np.vectorize(lambda x: x.value)(data)
1229
1230    new_values = func(values, **kwargs)
1231
1232    multi = int(isinstance(new_values, np.ndarray))
1233
1234    new_r_values = {}
1235    new_idl_d = {}
1236    for name in new_sample_names:
1237        idl = []
1238        tmp_values = np.zeros(n_obs)
1239        for i, item in enumerate(raveled_data):
1240            tmp_values[i] = item.r_values.get(name, item.value)
1241            tmp_idl = item.idl.get(name)
1242            if tmp_idl is not None:
1243                idl.append(tmp_idl)
1244        if multi > 0:
1245            tmp_values = np.array(tmp_values).reshape(data.shape)
1246        new_r_values[name] = func(tmp_values, **kwargs)
1247        new_idl_d[name] = _merge_idx(idl)
1248
1249    def _compute_scalefactor_missing_rep(obs):
1250        """
1251        Computes the scale factor that is to be multiplied with the deltas
1252        in the case where Obs with different subsets of replica are merged.
1253        Returns a dictionary with the scale factor for each Monte Carlo name.
1254
1255        Parameters
1256        ----------
1257        obs : Obs
1258            The observable corresponding to the deltas that are to be scaled
1259        """
1260        scalef_d = {}
1261        for mc_name in obs.mc_names:
1262            mc_idl_d = [name for name in obs.idl if name.startswith(mc_name + '|')]
1263            new_mc_idl_d = [name for name in new_idl_d if name.startswith(mc_name + '|')]
1264            if len(mc_idl_d) > 0 and len(mc_idl_d) < len(new_mc_idl_d):
1265                scalef_d[mc_name] = sum([len(new_idl_d[name]) for name in new_mc_idl_d]) / sum([len(new_idl_d[name]) for name in mc_idl_d])
1266        return scalef_d
1267
1268    if 'man_grad' in kwargs:
1269        deriv = np.asarray(kwargs.get('man_grad'))
1270        if new_values.shape + data.shape != deriv.shape:
1271            raise Exception('Manual derivative does not have correct shape.')
1272    elif kwargs.get('num_grad') is True:
1273        if multi > 0:
1274            raise Exception('Multi mode currently not supported for numerical derivative')
1275        options = {
1276            'base_step': 0.1,
1277            'step_ratio': 2.5}
1278        for key in options.keys():
1279            kwarg = kwargs.get(key)
1280            if kwarg is not None:
1281                options[key] = kwarg
1282        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1283        if tmp_df.size == 1:
1284            deriv = np.array([tmp_df.real])
1285        else:
1286            deriv = tmp_df.real
1287    else:
1288        deriv = jacobian(func)(values, **kwargs)
1289
1290    final_result = np.zeros(new_values.shape, dtype=object)
1291
1292    if array_mode is True:
1293
1294        class _Zero_grad():
1295            def __init__(self, N):
1296                self.grad = np.zeros((N, 1))
1297
1298        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1299        d_extracted = {}
1300        g_extracted = {}
1301        for name in new_sample_names:
1302            d_extracted[name] = []
1303            ens_length = len(new_idl_d[name])
1304            for i_dat, dat in enumerate(data):
1305                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name], _compute_scalefactor_missing_rep(o).get(name.split('|')[0], 1)) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1306        for name in new_cov_names:
1307            g_extracted[name] = []
1308            zero_grad = _Zero_grad(new_covobs_lengths[name])
1309            for i_dat, dat in enumerate(data):
1310                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1311
1312    for i_val, new_val in np.ndenumerate(new_values):
1313        new_deltas = {}
1314        new_grad = {}
1315        if array_mode is True:
1316            for name in new_sample_names:
1317                ens_length = d_extracted[name][0].shape[-1]
1318                new_deltas[name] = np.zeros(ens_length)
1319                for i_dat, dat in enumerate(d_extracted[name]):
1320                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1321            for name in new_cov_names:
1322                new_grad[name] = 0
1323                for i_dat, dat in enumerate(g_extracted[name]):
1324                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1325        else:
1326            for j_obs, obs in np.ndenumerate(data):
1327                scalef_d = _compute_scalefactor_missing_rep(obs)
1328                for name in obs.names:
1329                    if name in obs.cov_names:
1330                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1331                    else:
1332                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name], scalef_d.get(name.split('|')[0], 1))
1333
1334        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1335
1336        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1337            raise Exception('The same name has been used for deltas and covobs!')
1338        new_samples = []
1339        new_means = []
1340        new_idl = []
1341        new_names_obs = []
1342        for name in new_names:
1343            if name not in new_covobs:
1344                new_samples.append(new_deltas[name])
1345                new_idl.append(new_idl_d[name])
1346                new_means.append(new_r_values[name][i_val])
1347                new_names_obs.append(name)
1348        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1349        for name in new_covobs:
1350            final_result[i_val].names.append(name)
1351        final_result[i_val]._covobs = new_covobs
1352        final_result[i_val]._value = new_val
1353        final_result[i_val].reweighted = reweighted
1354
1355    if multi == 0:
1356        final_result = final_result.item()
1357
1358    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters

func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
data (list): list of Obs, e.g. [obs1, obs2, obs3].
num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.

Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs): View Source

1390def reweight(weight, obs, **kwargs):
1391    """Reweight a list of observables.
1392
1393    Parameters
1394    ----------
1395    weight : Obs
1396        Reweighting factor. An Observable that has to be defined on a superset of the
1397        configurations in obs[i].idl for all i.
1398    obs : list
1399        list of Obs, e.g. [obs1, obs2, obs3].
1400    all_configs : bool
1401        if True, the reweighted observables are normalized by the average of
1402        the reweighting factor on all configurations in weight.idl and not
1403        on the configurations in obs[i].idl. Default False.
1404    """
1405    result = []
1406    for i in range(len(obs)):
1407        if len(obs[i].cov_names):
1408            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1409        if not set(obs[i].names).issubset(weight.names):
1410            raise Exception('Error: Ensembles do not fit')
1411        for name in obs[i].names:
1412            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1413                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1414        new_samples = []
1415        w_deltas = {}
1416        for name in sorted(obs[i].names):
1417            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1418            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1419        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1420
1421        if kwargs.get('all_configs'):
1422            new_weight = weight
1423        else:
1424            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1425
1426        result.append(tmp_obs / new_weight)
1427        result[-1].reweighted = True
1428
1429    return result

Reweight a list of observables.

Parameters

weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
obs (list): list of Obs, e.g. [obs1, obs2, obs3].
all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.

def correlate(obs_a, obs_b): View Source

1432def correlate(obs_a, obs_b):
1433    """Correlate two observables.
1434
1435    Parameters
1436    ----------
1437    obs_a : Obs
1438        First observable
1439    obs_b : Obs
1440        Second observable
1441
1442    Notes
1443    -----
1444    Keep in mind to only correlate primary observables which have not been reweighted
1445    yet. The reweighting has to be applied after correlating the observables.
1446    Currently only works if ensembles are identical (this is not strictly necessary).
1447    """
1448
1449    if sorted(obs_a.names) != sorted(obs_b.names):
1450        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1451    if len(obs_a.cov_names) or len(obs_b.cov_names):
1452        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1453    for name in obs_a.names:
1454        if obs_a.shape[name] != obs_b.shape[name]:
1455            raise Exception('Shapes of ensemble', name, 'do not fit')
1456        if obs_a.idl[name] != obs_b.idl[name]:
1457            raise Exception('idl of ensemble', name, 'do not fit')
1458
1459    if obs_a.reweighted is True:
1460        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1461    if obs_b.reweighted is True:
1462        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1463
1464    new_samples = []
1465    new_idl = []
1466    for name in sorted(obs_a.names):
1467        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1468        new_idl.append(obs_a.idl[name])
1469
1470    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1471    o.reweighted = obs_a.reweighted or obs_b.reweighted
1472    return o

Correlate two observables.

Parameters

obs_a (Obs): First observable
obs_b (Obs): Second observable

Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): View Source

1475def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1476    r'''Calculates the error covariance matrix of a set of observables.
1477
1478    WARNING: This function should be used with care, especially for observables with support on multiple
1479             ensembles with differing autocorrelations. See the notes below for details.
1480
1481    The gamma method has to be applied first to all observables.
1482
1483    Parameters
1484    ----------
1485    obs : list or numpy.ndarray
1486        List or one dimensional array of Obs
1487    visualize : bool
1488        If True plots the corresponding normalized correlation matrix (default False).
1489    correlation : bool
1490        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1491    smooth : None or int
1492        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1493        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1494        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1495        small ones.
1496
1497    Notes
1498    -----
1499    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1500    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1501    in the absence of autocorrelation.
1502    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1503    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1504    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1505    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1506    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1507    '''
1508
1509    length = len(obs)
1510
1511    max_samples = np.max([o.N for o in obs])
1512    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1513        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1514
1515    cov = np.zeros((length, length))
1516    for i in range(length):
1517        for j in range(i, length):
1518            cov[i, j] = _covariance_element(obs[i], obs[j])
1519    cov = cov + cov.T - np.diag(np.diag(cov))
1520
1521    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1522
1523    if isinstance(smooth, int):
1524        corr = _smooth_eigenvalues(corr, smooth)
1525
1526    if visualize:
1527        plt.matshow(corr, vmin=-1, vmax=1)
1528        plt.set_cmap('RdBu')
1529        plt.colorbar()
1530        plt.draw()
1531
1532    if correlation is True:
1533        return corr
1534
1535    errors = [o.dvalue for o in obs]
1536    cov = np.diag(errors) @ corr @ np.diag(errors)
1537
1538    eigenvalues = np.linalg.eigh(cov)[0]
1539    if not np.all(eigenvalues >= 0):
1540        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1541
1542    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters

obs (list or numpy.ndarray): List or one dimensional array of Obs
visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.

Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def invert_corr_cov_cholesky(corr, inverrdiag): View Source

1545def invert_corr_cov_cholesky(corr, inverrdiag):
1546    """Constructs a lower triangular matrix `chol` via the Cholesky decomposition of the correlation matrix `corr`
1547       and then returns the inverse covariance matrix `chol_inv` as a lower triangular matrix by solving `chol * x = inverrdiag`.
1548
1549    Parameters
1550    ----------
1551    corr : np.ndarray
1552           correlation matrix
1553    inverrdiag : np.ndarray
1554              diagonal matrix, the entries are the inverse errors of the data points considered
1555    """
1556
1557    condn = np.linalg.cond(corr)
1558    if condn > 0.1 / np.finfo(float).eps:
1559        raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
1560    if condn > 1e13:
1561        warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
1562    chol = np.linalg.cholesky(corr)
1563    chol_inv = scipy.linalg.solve_triangular(chol, inverrdiag, lower=True)
1564
1565    return chol_inv

Constructs a lower triangular matrix chol via the Cholesky decomposition of the correlation matrix corr and then returns the inverse covariance matrix chol_inv as a lower triangular matrix by solving chol * x = inverrdiag.

Parameters

corr (np.ndarray): correlation matrix
inverrdiag (np.ndarray): diagonal matrix, the entries are the inverse errors of the data points considered

def sort_corr(corr, kl, yd): View Source

1568def sort_corr(corr, kl, yd):
1569    """ Reorders a correlation matrix to match the alphabetical order of its underlying y data.
1570
1571    The ordering of the input correlation matrix `corr` is given by the list of keys `kl`.
1572    The input dictionary `yd` (with the same keys `kl`) must contain the corresponding y data
1573    that the correlation matrix is based on.
1574    This function sorts the list of keys `kl` alphabetically and sorts the matrix `corr`
1575    according to this alphabetical order such that the sorted matrix `corr_sorted` corresponds
1576    to the y data `yd` when arranged in an alphabetical order by its keys.
1577
1578    Parameters
1579    ----------
1580    corr : np.ndarray
1581        A square correlation matrix constructed using the order of the y data specified by `kl`.
1582        The dimensions of `corr` should match the total number of y data points in `yd` combined.
1583    kl : list of str
1584        A list of keys that denotes the order in which the y data from `yd` was used to build the
1585        input correlation matrix `corr`.
1586    yd : dict of list
1587        A dictionary where each key corresponds to a unique identifier, and its value is a list of
1588        y data points. The total number of y data points across all keys must match the dimensions
1589        of `corr`. The lists in the dictionary can be lists of Obs.
1590
1591    Returns
1592    -------
1593    np.ndarray
1594        A new, sorted correlation matrix that corresponds to the y data from `yd` when arranged alphabetically by its keys.
1595
1596    Example
1597    -------
1598    >>> import numpy as np
1599    >>> import pyerrors as pe
1600    >>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
1601    >>> kl = ['b', 'a']
1602    >>> yd = {'a': [1, 2], 'b': [3]}
1603    >>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
1604    >>> print(sorted_corr)
1605    array([[1. , 0.3, 0.4],
1606           [0.3, 1. , 0.2],
1607           [0.4, 0.2, 1. ]])
1608
1609    """
1610    kl_sorted = sorted(kl)
1611
1612    posd = {}
1613    ofs = 0
1614    for ki, k in enumerate(kl):
1615        posd[k] = [i + ofs for i in range(len(yd[k]))]
1616        ofs += len(posd[k])
1617
1618    mapping = []
1619    for k in kl_sorted:
1620        for i in range(len(yd[k])):
1621            mapping.append(posd[k][i])
1622
1623    corr_sorted = np.zeros_like(corr)
1624    for i in range(corr.shape[0]):
1625        for j in range(corr.shape[0]):
1626            corr_sorted[i][j] = corr[mapping[i]][mapping[j]]
1627
1628    return corr_sorted

Reorders a correlation matrix to match the alphabetical order of its underlying y data.

The ordering of the input correlation matrix corr is given by the list of keys kl. The input dictionary yd (with the same keys kl) must contain the corresponding y data that the correlation matrix is based on. This function sorts the list of keys kl alphabetically and sorts the matrix corr according to this alphabetical order such that the sorted matrix corr_sorted corresponds to the y data yd when arranged in an alphabetical order by its keys.

Parameters

corr (np.ndarray): A square correlation matrix constructed using the order of the y data specified by kl. The dimensions of corr should match the total number of y data points in yd combined.
kl (list of str): A list of keys that denotes the order in which the y data from yd was used to build the input correlation matrix corr.
yd (dict of list): A dictionary where each key corresponds to a unique identifier, and its value is a list of y data points. The total number of y data points across all keys must match the dimensions of corr. The lists in the dictionary can be lists of Obs.

Returns

np.ndarray: A new, sorted correlation matrix that corresponds to the y data from yd when arranged alphabetically by its keys.

Example

>>> import numpy as np
>>> import pyerrors as pe
>>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
>>> kl = ['b', 'a']
>>> yd = {'a': [1, 2], 'b': [3]}
>>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
>>> print(sorted_corr)
array([[1. , 0.3, 0.4],
       [0.3, 1. , 0.2],
       [0.4, 0.2, 1. ]])

def import_jackknife(jacks, name, idl=None): View Source

1708def import_jackknife(jacks, name, idl=None):
1709    """Imports jackknife samples and returns an Obs
1710
1711    Parameters
1712    ----------
1713    jacks : numpy.ndarray
1714        numpy array containing the mean value as zeroth entry and
1715        the N jackknife samples as first to Nth entry.
1716    name : str
1717        name of the ensemble the samples are defined on.
1718    """
1719    length = len(jacks) - 1
1720    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1721    samples = jacks[1:] @ prj
1722    mean = np.mean(samples)
1723    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1724    new_obs._value = jacks[0]
1725    return new_obs

Imports jackknife samples and returns an Obs

Parameters

jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
name (str): name of the ensemble the samples are defined on.

def import_bootstrap(boots, name, random_numbers): View Source

1728def import_bootstrap(boots, name, random_numbers):
1729    """Imports bootstrap samples and returns an Obs
1730
1731    Parameters
1732    ----------
1733    boots : numpy.ndarray
1734        numpy array containing the mean value as zeroth entry and
1735        the N bootstrap samples as first to Nth entry.
1736    name : str
1737        name of the ensemble the samples are defined on.
1738    random_numbers : np.ndarray
1739        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1740        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1741        chain to be reconstructed.
1742    """
1743    samples, length = random_numbers.shape
1744    if samples != len(boots) - 1:
1745        raise ValueError("Random numbers do not have the correct shape.")
1746
1747    if samples < length:
1748        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1749
1750    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1751
1752    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1753    ret = Obs([samples], [name])
1754    ret._value = boots[0]
1755    return ret

Imports bootstrap samples and returns an Obs

Parameters

boots (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N bootstrap samples as first to Nth entry.
name (str): name of the ensemble the samples are defined on.
random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, where samples is the number of bootstrap samples and length is the length of the original Monte Carlo chain to be reconstructed.

def merge_obs(list_of_obs): View Source

1758def merge_obs(list_of_obs):
1759    """Combine all observables in list_of_obs into one new observable
1760
1761    Parameters
1762    ----------
1763    list_of_obs : list
1764        list of the Obs object to be combined
1765
1766    Notes
1767    -----
1768    It is not possible to combine obs which are based on the same replicum
1769    """
1770    replist = [item for obs in list_of_obs for item in obs.names]
1771    if (len(replist) == len(set(replist))) is False:
1772        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1773    if any([len(o.cov_names) for o in list_of_obs]):
1774        raise Exception('Not possible to merge data that contains covobs!')
1775    new_dict = {}
1776    idl_dict = {}
1777    for o in list_of_obs:
1778        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1779                        for key in set(o.deltas) | set(o.r_values)})
1780        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1781
1782    names = sorted(new_dict.keys())
1783    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1784    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1785    return o

Combine all observables in list_of_obs into one new observable

Parameters

list_of_obs (list): list of the Obs object to be combined

Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None): View Source

1788def cov_Obs(means, cov, name, grad=None):
1789    """Create an Obs based on mean(s) and a covariance matrix
1790
1791    Parameters
1792    ----------
1793    mean : list of floats or float
1794        N mean value(s) of the new Obs
1795    cov : list or array
1796        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1797    name : str
1798        identifier for the covariance matrix
1799    grad : list or array
1800        Gradient of the Covobs wrt. the means belonging to cov.
1801    """
1802
1803    def covobs_to_obs(co):
1804        """Make an Obs out of a Covobs
1805
1806        Parameters
1807        ----------
1808        co : Covobs
1809            Covobs to be embedded into the Obs
1810        """
1811        o = Obs([], [], means=[])
1812        o._value = co.value
1813        o.names.append(co.name)
1814        o._covobs[co.name] = co
1815        o._dvalue = np.sqrt(co.errsq())
1816        return o
1817
1818    ol = []
1819    if isinstance(means, (float, int)):
1820        means = [means]
1821
1822    for i in range(len(means)):
1823        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1824    if ol[0].covobs[name].N != len(means):
1825        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1826    if len(ol) == 1:
1827        return ol[0]
1828    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters

mean (list of floats or float): N mean value(s) of the new Obs
cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
name (str): identifier for the covariance matrix
grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.