pyerrors.obs

   1import warnings
   2import hashlib
   3import pickle
   4from math import gcd
   5from functools import reduce
   6import numpy as np
   7import autograd.numpy as anp  # Thinly-wrapped numpy
   8from autograd import jacobian
   9import matplotlib.pyplot as plt
  10from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  11import numdifftools as nd
  12from itertools import groupby
  13from .covobs import Covobs
  14
  15# Improve print output of numpy.ndarrays containing Obs objects.
  16np.set_printoptions(formatter={'object': lambda x: str(x)})
  17
  18
  19class Obs:
  20    """Class for a general observable.
  21
  22    Instances of Obs are the basic objects of a pyerrors error analysis.
  23    They are initialized with a list which contains arrays of samples for
  24    different ensembles/replica and another list of same length which contains
  25    the names of the ensembles/replica. Mathematical operations can be
  26    performed on instances. The result is another instance of Obs. The error of
  27    an instance can be computed with the gamma_method. Also contains additional
  28    methods for output and visualization of the error calculation.
  29
  30    Attributes
  31    ----------
  32    S_global : float
  33        Standard value for S (default 2.0)
  34    S_dict : dict
  35        Dictionary for S values. If an entry for a given ensemble
  36        exists this overwrites the standard value for that ensemble.
  37    tau_exp_global : float
  38        Standard value for tau_exp (default 0.0)
  39    tau_exp_dict : dict
  40        Dictionary for tau_exp values. If an entry for a given ensemble exists
  41        this overwrites the standard value for that ensemble.
  42    N_sigma_global : float
  43        Standard value for N_sigma (default 1.0)
  44    N_sigma_dict : dict
  45        Dictionary for N_sigma values. If an entry for a given ensemble exists
  46        this overwrites the standard value for that ensemble.
  47    """
  48    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  49                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  50                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  51                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  52                 'idl', 'tag', '_covobs', '__dict__']
  53
  54    S_global = 2.0
  55    S_dict = {}
  56    tau_exp_global = 0.0
  57    tau_exp_dict = {}
  58    N_sigma_global = 1.0
  59    N_sigma_dict = {}
  60
  61    def __init__(self, samples, names, idl=None, **kwargs):
  62        """ Initialize Obs object.
  63
  64        Parameters
  65        ----------
  66        samples : list
  67            list of numpy arrays containing the Monte Carlo samples
  68        names : list
  69            list of strings labeling the individual samples
  70        idl : list, optional
  71            list of ranges or lists on which the samples are defined
  72        """
  73
  74        if kwargs.get("means") is None and len(samples):
  75            if len(samples) != len(names):
  76                raise Exception('Length of samples and names incompatible.')
  77            if idl is not None:
  78                if len(idl) != len(names):
  79                    raise Exception('Length of idl incompatible with samples and names.')
  80            name_length = len(names)
  81            if name_length > 1:
  82                if name_length != len(set(names)):
  83                    raise Exception('names are not unique.')
  84                if not all(isinstance(x, str) for x in names):
  85                    raise TypeError('All names have to be strings.')
  86            else:
  87                if not isinstance(names[0], str):
  88                    raise TypeError('All names have to be strings.')
  89            if min(len(x) for x in samples) <= 4:
  90                raise Exception('Samples have to have at least 5 entries.')
  91
  92        self.names = sorted(names)
  93        self.shape = {}
  94        self.r_values = {}
  95        self.deltas = {}
  96        self._covobs = {}
  97
  98        self._value = 0
  99        self.N = 0
 100        self.idl = {}
 101        if idl is not None:
 102            for name, idx in sorted(zip(names, idl)):
 103                if isinstance(idx, range):
 104                    self.idl[name] = idx
 105                elif isinstance(idx, (list, np.ndarray)):
 106                    dc = np.unique(np.diff(idx))
 107                    if np.any(dc < 0):
 108                        raise Exception("Unsorted idx for idl[%s]" % (name))
 109                    if len(dc) == 1:
 110                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 111                    else:
 112                        self.idl[name] = list(idx)
 113                else:
 114                    raise Exception('incompatible type for idl[%s].' % (name))
 115        else:
 116            for name, sample in sorted(zip(names, samples)):
 117                self.idl[name] = range(1, len(sample) + 1)
 118
 119        if kwargs.get("means") is not None:
 120            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 121                self.shape[name] = len(self.idl[name])
 122                self.N += self.shape[name]
 123                self.r_values[name] = mean
 124                self.deltas[name] = sample
 125        else:
 126            for name, sample in sorted(zip(names, samples)):
 127                self.shape[name] = len(self.idl[name])
 128                self.N += self.shape[name]
 129                if len(sample) != self.shape[name]:
 130                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 131                self.r_values[name] = np.mean(sample)
 132                self.deltas[name] = sample - self.r_values[name]
 133                self._value += self.shape[name] * self.r_values[name]
 134            self._value /= self.N
 135
 136        self._dvalue = 0.0
 137        self.ddvalue = 0.0
 138        self.reweighted = False
 139
 140        self.tag = None
 141
 142    @property
 143    def value(self):
 144        return self._value
 145
 146    @property
 147    def dvalue(self):
 148        return self._dvalue
 149
 150    @property
 151    def e_names(self):
 152        return sorted(set([o.split('|')[0] for o in self.names]))
 153
 154    @property
 155    def cov_names(self):
 156        return sorted(set([o for o in self.covobs.keys()]))
 157
 158    @property
 159    def mc_names(self):
 160        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 161
 162    @property
 163    def e_content(self):
 164        res = {}
 165        for e, e_name in enumerate(self.e_names):
 166            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 167            if e_name in self.names:
 168                res[e_name].append(e_name)
 169        return res
 170
 171    @property
 172    def covobs(self):
 173        return self._covobs
 174
 175    def gamma_method(self, **kwargs):
 176        """Estimate the error and related properties of the Obs.
 177
 178        Parameters
 179        ----------
 180        S : float
 181            specifies a custom value for the parameter S (default 2.0).
 182            If set to 0 it is assumed that the data exhibits no
 183            autocorrelation. In this case the error estimates coincides
 184            with the sample standard error.
 185        tau_exp : float
 186            positive value triggers the critical slowing down analysis
 187            (default 0.0).
 188        N_sigma : float
 189            number of standard deviations from zero until the tail is
 190            attached to the autocorrelation function (default 1).
 191        fft : bool
 192            determines whether the fft algorithm is used for the computation
 193            of the autocorrelation function (default True)
 194        """
 195
 196        e_content = self.e_content
 197        self.e_dvalue = {}
 198        self.e_ddvalue = {}
 199        self.e_tauint = {}
 200        self.e_dtauint = {}
 201        self.e_windowsize = {}
 202        self.e_n_tauint = {}
 203        self.e_n_dtauint = {}
 204        e_gamma = {}
 205        self.e_rho = {}
 206        self.e_drho = {}
 207        self._dvalue = 0
 208        self.ddvalue = 0
 209
 210        self.S = {}
 211        self.tau_exp = {}
 212        self.N_sigma = {}
 213
 214        if kwargs.get('fft') is False:
 215            fft = False
 216        else:
 217            fft = True
 218
 219        def _parse_kwarg(kwarg_name):
 220            if kwarg_name in kwargs:
 221                tmp = kwargs.get(kwarg_name)
 222                if isinstance(tmp, (int, float)):
 223                    if tmp < 0:
 224                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 225                    for e, e_name in enumerate(self.e_names):
 226                        getattr(self, kwarg_name)[e_name] = tmp
 227                else:
 228                    raise TypeError(kwarg_name + ' is not in proper format.')
 229            else:
 230                for e, e_name in enumerate(self.e_names):
 231                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 232                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 233                    else:
 234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 235
 236        _parse_kwarg('S')
 237        _parse_kwarg('tau_exp')
 238        _parse_kwarg('N_sigma')
 239
 240        for e, e_name in enumerate(self.mc_names):
 241            r_length = []
 242            for r_name in e_content[e_name]:
 243                if isinstance(self.idl[r_name], range):
 244                    r_length.append(len(self.idl[r_name]))
 245                else:
 246                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
 247
 248            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 249            w_max = max(r_length) // 2
 250            e_gamma[e_name] = np.zeros(w_max)
 251            self.e_rho[e_name] = np.zeros(w_max)
 252            self.e_drho[e_name] = np.zeros(w_max)
 253
 254            for r_name in e_content[e_name]:
 255                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
 256
 257            gamma_div = np.zeros(w_max)
 258            for r_name in e_content[e_name]:
 259                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
 260            gamma_div[gamma_div < 1] = 1.0
 261            e_gamma[e_name] /= gamma_div[:w_max]
 262
 263            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 264                self.e_tauint[e_name] = 0.5
 265                self.e_dtauint[e_name] = 0.0
 266                self.e_dvalue[e_name] = 0.0
 267                self.e_ddvalue[e_name] = 0.0
 268                self.e_windowsize[e_name] = 0
 269                continue
 270
 271            gaps = []
 272            for r_name in e_content[e_name]:
 273                if isinstance(self.idl[r_name], range):
 274                    gaps.append(1)
 275                else:
 276                    gaps.append(np.min(np.diff(self.idl[r_name])))
 277
 278            if not np.all([gi == gaps[0] for gi in gaps]):
 279                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
 280            else:
 281                gapsize = gaps[0]
 282
 283            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 284            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 285            # Make sure no entry of tauint is smaller than 0.5
 286            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 287            # hep-lat/0306017 eq. (42)
 288            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
 289            self.e_n_dtauint[e_name][0] = 0.0
 290
 291            def _compute_drho(i):
 292                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
 293                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 294
 295            _compute_drho(gapsize)
 296            if self.tau_exp[e_name] > 0:
 297                texp = self.tau_exp[e_name]
 298                # Critical slowing down analysis
 299                if w_max // 2 <= 1:
 300                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 301                for n in range(gapsize, w_max // 2, gapsize):
 302                    _compute_drho(n + gapsize)
 303                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 304                        # Bias correction hep-lat/0306017 eq. (49) included
 305                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 306                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 307                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 308                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 309                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
 310                        self.e_windowsize[e_name] = n
 311                        break
 312            else:
 313                if self.S[e_name] == 0.0:
 314                    self.e_tauint[e_name] = 0.5
 315                    self.e_dtauint[e_name] = 0.0
 316                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 317                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 318                    self.e_windowsize[e_name] = 0
 319                else:
 320                    # Standard automatic windowing procedure
 321                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
 322                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 323                    for n in range(1, w_max):
 324                        if n < w_max // 2 - 2:
 325                            _compute_drho(gapsize * n + gapsize)
 326                        if g_w[n - 1] < 0 or n >= w_max - 1:
 327                            n *= gapsize
 328                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 329                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 330                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 331                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
 332                            self.e_windowsize[e_name] = n
 333                            break
 334
 335            self._dvalue += self.e_dvalue[e_name] ** 2
 336            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 337
 338        for e_name in self.cov_names:
 339            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 340            self.e_ddvalue[e_name] = 0
 341            self._dvalue += self.e_dvalue[e_name]**2
 342
 343        self._dvalue = np.sqrt(self._dvalue)
 344        if self._dvalue == 0.0:
 345            self.ddvalue = 0.0
 346        else:
 347            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 348        return
 349
 350    gm = gamma_method
 351
 352    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
 353        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 354           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 355
 356        Parameters
 357        ----------
 358        deltas : list
 359            List of fluctuations
 360        idx : list
 361            List or range of configurations on which the deltas are defined.
 362        shape : int
 363            Number of configurations in idx.
 364        w_max : int
 365            Upper bound for the summation window.
 366        fft : bool
 367            determines whether the fft algorithm is used for the computation
 368            of the autocorrelation function.
 369        """
 370        gamma = np.zeros(w_max)
 371        deltas = _expand_deltas(deltas, idx, shape)
 372        new_shape = len(deltas)
 373        if fft:
 374            max_gamma = min(new_shape, w_max)
 375            # The padding for the fft has to be even
 376            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 377            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 378        else:
 379            for n in range(w_max):
 380                if new_shape - n >= 0:
 381                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 382
 383        return gamma
 384
 385    def details(self, ens_content=True):
 386        """Output detailed properties of the Obs.
 387
 388        Parameters
 389        ----------
 390        ens_content : bool
 391            print details about the ensembles and replica if true.
 392        """
 393        if self.tag is not None:
 394            print("Description:", self.tag)
 395        if not hasattr(self, 'e_dvalue'):
 396            print('Result\t %3.8e' % (self.value))
 397        else:
 398            if self.value == 0.0:
 399                percentage = np.nan
 400            else:
 401                percentage = np.abs(self._dvalue / self.value) * 100
 402            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 403            if len(self.e_names) > 1:
 404                print(' Ensemble errors:')
 405            e_content = self.e_content
 406            for e_name in self.mc_names:
 407                if isinstance(self.idl[e_content[e_name][0]], range):
 408                    gap = self.idl[e_content[e_name][0]].step
 409                else:
 410                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
 411
 412                if len(self.e_names) > 1:
 413                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 414                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 415                tau_string += f" in units of {gap} config"
 416                if gap > 1:
 417                    tau_string += "s"
 418                if self.tau_exp[e_name] > 0:
 419                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 420                else:
 421                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 422                print(tau_string)
 423            for e_name in self.cov_names:
 424                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 425        if ens_content is True:
 426            if len(self.e_names) == 1:
 427                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 428            else:
 429                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 430            my_string_list = []
 431            for key, value in sorted(self.e_content.items()):
 432                if key not in self.covobs:
 433                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 434                    if len(value) == 1:
 435                        my_string += f': {self.shape[value[0]]} configurations'
 436                        if isinstance(self.idl[value[0]], range):
 437                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 438                        else:
 439                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 440                    else:
 441                        sublist = []
 442                        for v in value:
 443                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 444                            my_substring += f': {self.shape[v]} configurations'
 445                            if isinstance(self.idl[v], range):
 446                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 447                            else:
 448                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 449                            sublist.append(my_substring)
 450
 451                        my_string += '\n' + '\n'.join(sublist)
 452                else:
 453                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 454                my_string_list.append(my_string)
 455            print('\n'.join(my_string_list))
 456
 457    def reweight(self, weight):
 458        """Reweight the obs with given rewighting factors.
 459
 460        Parameters
 461        ----------
 462        weight : Obs
 463            Reweighting factor. An Observable that has to be defined on a superset of the
 464            configurations in obs[i].idl for all i.
 465        all_configs : bool
 466            if True, the reweighted observables are normalized by the average of
 467            the reweighting factor on all configurations in weight.idl and not
 468            on the configurations in obs[i].idl. Default False.
 469        """
 470        return reweight(weight, [self])[0]
 471
 472    def is_zero_within_error(self, sigma=1):
 473        """Checks whether the observable is zero within 'sigma' standard errors.
 474
 475        Parameters
 476        ----------
 477        sigma : int
 478            Number of standard errors used for the check.
 479
 480        Works only properly when the gamma method was run.
 481        """
 482        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 483
 484    def is_zero(self, atol=1e-10):
 485        """Checks whether the observable is zero within a given tolerance.
 486
 487        Parameters
 488        ----------
 489        atol : float
 490            Absolute tolerance (for details see numpy documentation).
 491        """
 492        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 493
 494    def plot_tauint(self, save=None):
 495        """Plot integrated autocorrelation time for each ensemble.
 496
 497        Parameters
 498        ----------
 499        save : str
 500            saves the figure to a file named 'save' if.
 501        """
 502        if not hasattr(self, 'e_dvalue'):
 503            raise Exception('Run the gamma method first.')
 504
 505        for e, e_name in enumerate(self.mc_names):
 506            fig = plt.figure()
 507            plt.xlabel(r'$W$')
 508            plt.ylabel(r'$\tau_\mathrm{int}$')
 509            length = int(len(self.e_n_tauint[e_name]))
 510            if self.tau_exp[e_name] > 0:
 511                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 512                x_help = np.arange(2 * self.tau_exp[e_name])
 513                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 514                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 515                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 516                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 517                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 518                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 519                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 520            else:
 521                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 522                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 523
 524            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 525            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 526            plt.legend()
 527            plt.xlim(-0.5, xmax)
 528            ylim = plt.ylim()
 529            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 530            plt.draw()
 531            if save:
 532                fig.savefig(save + "_" + str(e))
 533
 534    def plot_rho(self, save=None):
 535        """Plot normalized autocorrelation function time for each ensemble.
 536
 537        Parameters
 538        ----------
 539        save : str
 540            saves the figure to a file named 'save' if.
 541        """
 542        if not hasattr(self, 'e_dvalue'):
 543            raise Exception('Run the gamma method first.')
 544        for e, e_name in enumerate(self.mc_names):
 545            fig = plt.figure()
 546            plt.xlabel('W')
 547            plt.ylabel('rho')
 548            length = int(len(self.e_drho[e_name]))
 549            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 550            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 551            if self.tau_exp[e_name] > 0:
 552                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 553                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 554                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 555                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 556            else:
 557                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 558                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 559            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 560            plt.xlim(-0.5, xmax)
 561            plt.draw()
 562            if save:
 563                fig.savefig(save + "_" + str(e))
 564
 565    def plot_rep_dist(self):
 566        """Plot replica distribution for each ensemble with more than one replicum."""
 567        if not hasattr(self, 'e_dvalue'):
 568            raise Exception('Run the gamma method first.')
 569        for e, e_name in enumerate(self.mc_names):
 570            if len(self.e_content[e_name]) == 1:
 571                print('No replica distribution for a single replicum (', e_name, ')')
 572                continue
 573            r_length = []
 574            sub_r_mean = 0
 575            for r, r_name in enumerate(self.e_content[e_name]):
 576                r_length.append(len(self.deltas[r_name]))
 577                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 578            e_N = np.sum(r_length)
 579            sub_r_mean /= e_N
 580            arr = np.zeros(len(self.e_content[e_name]))
 581            for r, r_name in enumerate(self.e_content[e_name]):
 582                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 583            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 584            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 585            plt.draw()
 586
 587    def plot_history(self, expand=True):
 588        """Plot derived Monte Carlo history for each ensemble
 589
 590        Parameters
 591        ----------
 592        expand : bool
 593            show expanded history for irregular Monte Carlo chains (default: True).
 594        """
 595        for e, e_name in enumerate(self.mc_names):
 596            plt.figure()
 597            r_length = []
 598            tmp = []
 599            tmp_expanded = []
 600            for r, r_name in enumerate(self.e_content[e_name]):
 601                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 602                if expand:
 603                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
 604                    r_length.append(len(tmp_expanded[-1]))
 605                else:
 606                    r_length.append(len(tmp[-1]))
 607            e_N = np.sum(r_length)
 608            x = np.arange(e_N)
 609            y_test = np.concatenate(tmp, axis=0)
 610            if expand:
 611                y = np.concatenate(tmp_expanded, axis=0)
 612            else:
 613                y = y_test
 614            plt.errorbar(x, y, fmt='.', markersize=3)
 615            plt.xlim(-0.5, e_N - 0.5)
 616            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 617            plt.draw()
 618
 619    def plot_piechart(self, save=None):
 620        """Plot piechart which shows the fractional contribution of each
 621        ensemble to the error and returns a dictionary containing the fractions.
 622
 623        Parameters
 624        ----------
 625        save : str
 626            saves the figure to a file named 'save' if.
 627        """
 628        if not hasattr(self, 'e_dvalue'):
 629            raise Exception('Run the gamma method first.')
 630        if np.isclose(0.0, self._dvalue, atol=1e-15):
 631            raise Exception('Error is 0.0')
 632        labels = self.e_names
 633        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 634        fig1, ax1 = plt.subplots()
 635        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 636        ax1.axis('equal')
 637        plt.draw()
 638        if save:
 639            fig1.savefig(save)
 640
 641        return dict(zip(self.e_names, sizes))
 642
 643    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 644        """Dump the Obs to a file 'name' of chosen format.
 645
 646        Parameters
 647        ----------
 648        filename : str
 649            name of the file to be saved.
 650        datatype : str
 651            Format of the exported file. Supported formats include
 652            "json.gz" and "pickle"
 653        description : str
 654            Description for output file, only relevant for json.gz format.
 655        path : str
 656            specifies a custom path for the file (default '.')
 657        """
 658        if 'path' in kwargs:
 659            file_name = kwargs.get('path') + '/' + filename
 660        else:
 661            file_name = filename
 662
 663        if datatype == "json.gz":
 664            from .input.json import dump_to_json
 665            dump_to_json([self], file_name, description=description)
 666        elif datatype == "pickle":
 667            with open(file_name + '.p', 'wb') as fb:
 668                pickle.dump(self, fb)
 669        else:
 670            raise Exception("Unknown datatype " + str(datatype))
 671
 672    def export_jackknife(self):
 673        """Export jackknife samples from the Obs
 674
 675        Returns
 676        -------
 677        numpy.ndarray
 678            Returns a numpy array of length N + 1 where N is the number of samples
 679            for the given ensemble and replicum. The zeroth entry of the array contains
 680            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 681            derived from the Obs. The current implementation only works for observables
 682            defined on exactly one ensemble and replicum. The derived jackknife samples
 683            should agree with samples from a full jackknife analysis up to O(1/N).
 684        """
 685
 686        if len(self.names) != 1:
 687            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 688
 689        name = self.names[0]
 690        full_data = self.deltas[name] + self.r_values[name]
 691        n = full_data.size
 692        mean = self.value
 693        tmp_jacks = np.zeros(n + 1)
 694        tmp_jacks[0] = mean
 695        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 696        return tmp_jacks
 697
 698    def __float__(self):
 699        return float(self.value)
 700
 701    def __repr__(self):
 702        return 'Obs[' + str(self) + ']'
 703
 704    def __str__(self):
 705        return _format_uncertainty(self.value, self._dvalue)
 706
 707    def __hash__(self):
 708        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 709        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 710        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 711        hash_tuple += tuple([o.encode() for o in self.names])
 712        m = hashlib.md5()
 713        [m.update(o) for o in hash_tuple]
 714        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 715
 716    # Overload comparisons
 717    def __lt__(self, other):
 718        return self.value < other
 719
 720    def __le__(self, other):
 721        return self.value <= other
 722
 723    def __gt__(self, other):
 724        return self.value > other
 725
 726    def __ge__(self, other):
 727        return self.value >= other
 728
 729    def __eq__(self, other):
 730        return (self - other).is_zero()
 731
 732    def __ne__(self, other):
 733        return not (self - other).is_zero()
 734
 735    # Overload math operations
 736    def __add__(self, y):
 737        if isinstance(y, Obs):
 738            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 739        else:
 740            if isinstance(y, np.ndarray):
 741                return np.array([self + o for o in y])
 742            elif y.__class__.__name__ in ['Corr', 'CObs']:
 743                return NotImplemented
 744            else:
 745                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 746
 747    def __radd__(self, y):
 748        return self + y
 749
 750    def __mul__(self, y):
 751        if isinstance(y, Obs):
 752            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 753        else:
 754            if isinstance(y, np.ndarray):
 755                return np.array([self * o for o in y])
 756            elif isinstance(y, complex):
 757                return CObs(self * y.real, self * y.imag)
 758            elif y.__class__.__name__ in ['Corr', 'CObs']:
 759                return NotImplemented
 760            else:
 761                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 762
 763    def __rmul__(self, y):
 764        return self * y
 765
 766    def __sub__(self, y):
 767        if isinstance(y, Obs):
 768            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 769        else:
 770            if isinstance(y, np.ndarray):
 771                return np.array([self - o for o in y])
 772            elif y.__class__.__name__ in ['Corr', 'CObs']:
 773                return NotImplemented
 774            else:
 775                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 776
 777    def __rsub__(self, y):
 778        return -1 * (self - y)
 779
 780    def __pos__(self):
 781        return self
 782
 783    def __neg__(self):
 784        return -1 * self
 785
 786    def __truediv__(self, y):
 787        if isinstance(y, Obs):
 788            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 789        else:
 790            if isinstance(y, np.ndarray):
 791                return np.array([self / o for o in y])
 792            elif y.__class__.__name__ in ['Corr', 'CObs']:
 793                return NotImplemented
 794            else:
 795                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 796
 797    def __rtruediv__(self, y):
 798        if isinstance(y, Obs):
 799            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 800        else:
 801            if isinstance(y, np.ndarray):
 802                return np.array([o / self for o in y])
 803            elif y.__class__.__name__ in ['Corr', 'CObs']:
 804                return NotImplemented
 805            else:
 806                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 807
 808    def __pow__(self, y):
 809        if isinstance(y, Obs):
 810            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 811        else:
 812            return derived_observable(lambda x: x[0] ** y, [self])
 813
 814    def __rpow__(self, y):
 815        if isinstance(y, Obs):
 816            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 817        else:
 818            return derived_observable(lambda x: y ** x[0], [self])
 819
 820    def __abs__(self):
 821        return derived_observable(lambda x: anp.abs(x[0]), [self])
 822
 823    # Overload numpy functions
 824    def sqrt(self):
 825        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 826
 827    def log(self):
 828        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 829
 830    def exp(self):
 831        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 832
 833    def sin(self):
 834        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 835
 836    def cos(self):
 837        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 838
 839    def tan(self):
 840        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 841
 842    def arcsin(self):
 843        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 844
 845    def arccos(self):
 846        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 847
 848    def arctan(self):
 849        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 850
 851    def sinh(self):
 852        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 853
 854    def cosh(self):
 855        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 856
 857    def tanh(self):
 858        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 859
 860    def arcsinh(self):
 861        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 862
 863    def arccosh(self):
 864        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 865
 866    def arctanh(self):
 867        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 868
 869
 870class CObs:
 871    """Class for a complex valued observable."""
 872    __slots__ = ['_real', '_imag', 'tag']
 873
 874    def __init__(self, real, imag=0.0):
 875        self._real = real
 876        self._imag = imag
 877        self.tag = None
 878
 879    @property
 880    def real(self):
 881        return self._real
 882
 883    @property
 884    def imag(self):
 885        return self._imag
 886
 887    def gamma_method(self, **kwargs):
 888        """Executes the gamma_method for the real and the imaginary part."""
 889        if isinstance(self.real, Obs):
 890            self.real.gamma_method(**kwargs)
 891        if isinstance(self.imag, Obs):
 892            self.imag.gamma_method(**kwargs)
 893
 894    def is_zero(self):
 895        """Checks whether both real and imaginary part are zero within machine precision."""
 896        return self.real == 0.0 and self.imag == 0.0
 897
 898    def conjugate(self):
 899        return CObs(self.real, -self.imag)
 900
 901    def __add__(self, other):
 902        if isinstance(other, np.ndarray):
 903            return other + self
 904        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 905            return CObs(self.real + other.real,
 906                        self.imag + other.imag)
 907        else:
 908            return CObs(self.real + other, self.imag)
 909
 910    def __radd__(self, y):
 911        return self + y
 912
 913    def __sub__(self, other):
 914        if isinstance(other, np.ndarray):
 915            return -1 * (other - self)
 916        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 917            return CObs(self.real - other.real, self.imag - other.imag)
 918        else:
 919            return CObs(self.real - other, self.imag)
 920
 921    def __rsub__(self, other):
 922        return -1 * (self - other)
 923
 924    def __mul__(self, other):
 925        if isinstance(other, np.ndarray):
 926            return other * self
 927        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 928            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 929                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 930                                               [self.real, other.real, self.imag, other.imag],
 931                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 932                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 933                                               [self.real, other.real, self.imag, other.imag],
 934                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 935            elif getattr(other, 'imag', 0) != 0:
 936                return CObs(self.real * other.real - self.imag * other.imag,
 937                            self.imag * other.real + self.real * other.imag)
 938            else:
 939                return CObs(self.real * other.real, self.imag * other.real)
 940        else:
 941            return CObs(self.real * other, self.imag * other)
 942
 943    def __rmul__(self, other):
 944        return self * other
 945
 946    def __truediv__(self, other):
 947        if isinstance(other, np.ndarray):
 948            return 1 / (other / self)
 949        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 950            r = other.real ** 2 + other.imag ** 2
 951            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 952        else:
 953            return CObs(self.real / other, self.imag / other)
 954
 955    def __rtruediv__(self, other):
 956        r = self.real ** 2 + self.imag ** 2
 957        if hasattr(other, 'real') and hasattr(other, 'imag'):
 958            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 959        else:
 960            return CObs(self.real * other / r, -self.imag * other / r)
 961
 962    def __abs__(self):
 963        return np.sqrt(self.real**2 + self.imag**2)
 964
 965    def __pos__(self):
 966        return self
 967
 968    def __neg__(self):
 969        return -1 * self
 970
 971    def __eq__(self, other):
 972        return self.real == other.real and self.imag == other.imag
 973
 974    def __str__(self):
 975        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 976
 977    def __repr__(self):
 978        return 'CObs[' + str(self) + ']'
 979
 980
 981def _format_uncertainty(value, dvalue):
 982    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
 983    if dvalue == 0.0:
 984        return str(value)
 985    fexp = np.floor(np.log10(dvalue))
 986    if fexp < 0.0:
 987        return '{:{form}}({:2.0f})'.format(value, dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
 988    elif fexp == 0.0:
 989        return '{:.1f}({:1.1f})'.format(value, dvalue)
 990    else:
 991        return '{:.0f}({:2.0f})'.format(value, dvalue)
 992
 993
 994def _expand_deltas(deltas, idx, shape):
 995    """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0.
 996       If idx is of type range, the deltas are not changed
 997
 998    Parameters
 999    ----------
1000    deltas : list
1001        List of fluctuations
1002    idx : list
1003        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1004    shape : int
1005        Number of configs in idx.
1006    """
1007    if isinstance(idx, range):
1008        return deltas
1009    else:
1010        ret = np.zeros(idx[-1] - idx[0] + 1)
1011        for i in range(shape):
1012            ret[idx[i] - idx[0]] = deltas[i]
1013        return ret
1014
1015
1016def _merge_idx(idl):
1017    """Returns the union of all lists in idl as sorted list
1018
1019    Parameters
1020    ----------
1021    idl : list
1022        List of lists or ranges.
1023    """
1024
1025    # Use groupby to efficiently check whether all elements of idl are identical
1026    try:
1027        g = groupby(idl)
1028        if next(g, True) and not next(g, False):
1029            return idl[0]
1030    except Exception:
1031        pass
1032
1033    if np.all([type(idx) is range for idx in idl]):
1034        if len(set([idx[0] for idx in idl])) == 1:
1035            idstart = min([idx.start for idx in idl])
1036            idstop = max([idx.stop for idx in idl])
1037            idstep = min([idx.step for idx in idl])
1038            return range(idstart, idstop, idstep)
1039
1040    return sorted(set().union(*idl))
1041
1042
1043def _intersection_idx(idl):
1044    """Returns the intersection of all lists in idl as sorted list
1045
1046    Parameters
1047    ----------
1048    idl : list
1049        List of lists or ranges.
1050    """
1051
1052    def _lcm(*args):
1053        """Returns the lowest common multiple of args.
1054
1055        From python 3.9 onwards the math library contains an lcm function."""
1056        return reduce(lambda a, b: a * b // gcd(a, b), args)
1057
1058    # Use groupby to efficiently check whether all elements of idl are identical
1059    try:
1060        g = groupby(idl)
1061        if next(g, True) and not next(g, False):
1062            return idl[0]
1063    except Exception:
1064        pass
1065
1066    if np.all([type(idx) is range for idx in idl]):
1067        if len(set([idx[0] for idx in idl])) == 1:
1068            idstart = max([idx.start for idx in idl])
1069            idstop = min([idx.stop for idx in idl])
1070            idstep = _lcm(*[idx.step for idx in idl])
1071            return range(idstart, idstop, idstep)
1072
1073    return sorted(set.intersection(*[set(o) for o in idl]))
1074
1075
1076def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1077    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1078       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1079       common divisor of the step sizes is used as new step size.
1080
1081    Parameters
1082    ----------
1083    deltas : list
1084        List of fluctuations
1085    idx : list
1086        List or range of configs on which the deltas are defined.
1087        Has to be a subset of new_idx and has to be sorted in ascending order.
1088    shape : list
1089        Number of configs in idx.
1090    new_idx : list
1091        List of configs that defines the new range, has to be sorted in ascending order.
1092    """
1093
1094    if type(idx) is range and type(new_idx) is range:
1095        if idx == new_idx:
1096            return deltas
1097    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1098    for i in range(shape):
1099        ret[idx[i] - new_idx[0]] = deltas[i]
1100    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx)
1101
1102
1103def derived_observable(func, data, array_mode=False, **kwargs):
1104    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1105
1106    Parameters
1107    ----------
1108    func : object
1109        arbitrary function of the form func(data, **kwargs). For the
1110        automatic differentiation to work, all numpy functions have to have
1111        the autograd wrapper (use 'import autograd.numpy as anp').
1112    data : list
1113        list of Obs, e.g. [obs1, obs2, obs3].
1114    num_grad : bool
1115        if True, numerical derivatives are used instead of autograd
1116        (default False). To control the numerical differentiation the
1117        kwargs of numdifftools.step_generators.MaxStepGenerator
1118        can be used.
1119    man_grad : list
1120        manually supply a list or an array which contains the jacobian
1121        of func. Use cautiously, supplying the wrong derivative will
1122        not be intercepted.
1123
1124    Notes
1125    -----
1126    For simple mathematical operations it can be practical to use anonymous
1127    functions. For the ratio of two observables one can e.g. use
1128
1129    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1130    """
1131
1132    data = np.asarray(data)
1133    raveled_data = data.ravel()
1134
1135    # Workaround for matrix operations containing non Obs data
1136    if not all(isinstance(x, Obs) for x in raveled_data):
1137        for i in range(len(raveled_data)):
1138            if isinstance(raveled_data[i], (int, float)):
1139                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1140
1141    allcov = {}
1142    for o in raveled_data:
1143        for name in o.cov_names:
1144            if name in allcov:
1145                if not np.allclose(allcov[name], o.covobs[name].cov):
1146                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1147            else:
1148                allcov[name] = o.covobs[name].cov
1149
1150    n_obs = len(raveled_data)
1151    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1152    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1153    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1154
1155    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1156
1157    if data.ndim == 1:
1158        values = np.array([o.value for o in data])
1159    else:
1160        values = np.vectorize(lambda x: x.value)(data)
1161
1162    new_values = func(values, **kwargs)
1163
1164    multi = int(isinstance(new_values, np.ndarray))
1165
1166    new_r_values = {}
1167    new_idl_d = {}
1168    for name in new_sample_names:
1169        idl = []
1170        tmp_values = np.zeros(n_obs)
1171        for i, item in enumerate(raveled_data):
1172            tmp_values[i] = item.r_values.get(name, item.value)
1173            tmp_idl = item.idl.get(name)
1174            if tmp_idl is not None:
1175                idl.append(tmp_idl)
1176        if multi > 0:
1177            tmp_values = np.array(tmp_values).reshape(data.shape)
1178        new_r_values[name] = func(tmp_values, **kwargs)
1179        new_idl_d[name] = _merge_idx(idl)
1180
1181    if 'man_grad' in kwargs:
1182        deriv = np.asarray(kwargs.get('man_grad'))
1183        if new_values.shape + data.shape != deriv.shape:
1184            raise Exception('Manual derivative does not have correct shape.')
1185    elif kwargs.get('num_grad') is True:
1186        if multi > 0:
1187            raise Exception('Multi mode currently not supported for numerical derivative')
1188        options = {
1189            'base_step': 0.1,
1190            'step_ratio': 2.5}
1191        for key in options.keys():
1192            kwarg = kwargs.get(key)
1193            if kwarg is not None:
1194                options[key] = kwarg
1195        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1196        if tmp_df.size == 1:
1197            deriv = np.array([tmp_df.real])
1198        else:
1199            deriv = tmp_df.real
1200    else:
1201        deriv = jacobian(func)(values, **kwargs)
1202
1203    final_result = np.zeros(new_values.shape, dtype=object)
1204
1205    if array_mode is True:
1206
1207        class _Zero_grad():
1208            def __init__(self, N):
1209                self.grad = np.zeros((N, 1))
1210
1211        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1212        d_extracted = {}
1213        g_extracted = {}
1214        for name in new_sample_names:
1215            d_extracted[name] = []
1216            ens_length = len(new_idl_d[name])
1217            for i_dat, dat in enumerate(data):
1218                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1219        for name in new_cov_names:
1220            g_extracted[name] = []
1221            zero_grad = _Zero_grad(new_covobs_lengths[name])
1222            for i_dat, dat in enumerate(data):
1223                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1224
1225    for i_val, new_val in np.ndenumerate(new_values):
1226        new_deltas = {}
1227        new_grad = {}
1228        if array_mode is True:
1229            for name in new_sample_names:
1230                ens_length = d_extracted[name][0].shape[-1]
1231                new_deltas[name] = np.zeros(ens_length)
1232                for i_dat, dat in enumerate(d_extracted[name]):
1233                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1234            for name in new_cov_names:
1235                new_grad[name] = 0
1236                for i_dat, dat in enumerate(g_extracted[name]):
1237                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1238        else:
1239            for j_obs, obs in np.ndenumerate(data):
1240                for name in obs.names:
1241                    if name in obs.cov_names:
1242                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1243                    else:
1244                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1245
1246        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1247
1248        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1249            raise Exception('The same name has been used for deltas and covobs!')
1250        new_samples = []
1251        new_means = []
1252        new_idl = []
1253        new_names_obs = []
1254        for name in new_names:
1255            if name not in new_covobs:
1256                new_samples.append(new_deltas[name])
1257                new_idl.append(new_idl_d[name])
1258                new_means.append(new_r_values[name][i_val])
1259                new_names_obs.append(name)
1260        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1261        for name in new_covobs:
1262            final_result[i_val].names.append(name)
1263        final_result[i_val]._covobs = new_covobs
1264        final_result[i_val]._value = new_val
1265        final_result[i_val].reweighted = reweighted
1266
1267    if multi == 0:
1268        final_result = final_result.item()
1269
1270    return final_result
1271
1272
1273def _reduce_deltas(deltas, idx_old, idx_new):
1274    """Extract deltas defined on idx_old on all configs of idx_new.
1275
1276    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1277    are ordered in an ascending order.
1278
1279    Parameters
1280    ----------
1281    deltas : list
1282        List of fluctuations
1283    idx_old : list
1284        List or range of configs on which the deltas are defined
1285    idx_new : list
1286        List of configs for which we want to extract the deltas.
1287        Has to be a subset of idx_old.
1288    """
1289    if not len(deltas) == len(idx_old):
1290        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1291    if type(idx_old) is range and type(idx_new) is range:
1292        if idx_old == idx_new:
1293            return deltas
1294    # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical
1295    try:
1296        g = groupby([idx_old, idx_new])
1297        if next(g, True) and not next(g, False):
1298            return deltas
1299    except Exception:
1300        pass
1301    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1302    if len(indices) < len(idx_new):
1303        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1304    return np.array(deltas)[indices]
1305
1306
1307def reweight(weight, obs, **kwargs):
1308    """Reweight a list of observables.
1309
1310    Parameters
1311    ----------
1312    weight : Obs
1313        Reweighting factor. An Observable that has to be defined on a superset of the
1314        configurations in obs[i].idl for all i.
1315    obs : list
1316        list of Obs, e.g. [obs1, obs2, obs3].
1317    all_configs : bool
1318        if True, the reweighted observables are normalized by the average of
1319        the reweighting factor on all configurations in weight.idl and not
1320        on the configurations in obs[i].idl. Default False.
1321    """
1322    result = []
1323    for i in range(len(obs)):
1324        if len(obs[i].cov_names):
1325            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1326        if not set(obs[i].names).issubset(weight.names):
1327            raise Exception('Error: Ensembles do not fit')
1328        for name in obs[i].names:
1329            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1330                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1331        new_samples = []
1332        w_deltas = {}
1333        for name in sorted(obs[i].names):
1334            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1335            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1336        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1337
1338        if kwargs.get('all_configs'):
1339            new_weight = weight
1340        else:
1341            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1342
1343        result.append(tmp_obs / new_weight)
1344        result[-1].reweighted = True
1345
1346    return result
1347
1348
1349def correlate(obs_a, obs_b):
1350    """Correlate two observables.
1351
1352    Parameters
1353    ----------
1354    obs_a : Obs
1355        First observable
1356    obs_b : Obs
1357        Second observable
1358
1359    Notes
1360    -----
1361    Keep in mind to only correlate primary observables which have not been reweighted
1362    yet. The reweighting has to be applied after correlating the observables.
1363    Currently only works if ensembles are identical (this is not strictly necessary).
1364    """
1365
1366    if sorted(obs_a.names) != sorted(obs_b.names):
1367        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1368    if len(obs_a.cov_names) or len(obs_b.cov_names):
1369        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1370    for name in obs_a.names:
1371        if obs_a.shape[name] != obs_b.shape[name]:
1372            raise Exception('Shapes of ensemble', name, 'do not fit')
1373        if obs_a.idl[name] != obs_b.idl[name]:
1374            raise Exception('idl of ensemble', name, 'do not fit')
1375
1376    if obs_a.reweighted is True:
1377        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1378    if obs_b.reweighted is True:
1379        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1380
1381    new_samples = []
1382    new_idl = []
1383    for name in sorted(obs_a.names):
1384        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1385        new_idl.append(obs_a.idl[name])
1386
1387    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1388    o.reweighted = obs_a.reweighted or obs_b.reweighted
1389    return o
1390
1391
1392def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1393    r'''Calculates the error covariance matrix of a set of observables.
1394
1395    WARNING: This function should be used with care, especially for observables with support on multiple
1396             ensembles with differing autocorrelations. See the notes below for details.
1397
1398    The gamma method has to be applied first to all observables.
1399
1400    Parameters
1401    ----------
1402    obs : list or numpy.ndarray
1403        List or one dimensional array of Obs
1404    visualize : bool
1405        If True plots the corresponding normalized correlation matrix (default False).
1406    correlation : bool
1407        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1408    smooth : None or int
1409        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1410        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1411        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1412        small ones.
1413
1414    Notes
1415    -----
1416    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1417    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1418    in the absence of autocorrelation.
1419    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1420    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1421    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1422    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1423    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1424    '''
1425
1426    length = len(obs)
1427
1428    max_samples = np.max([o.N for o in obs])
1429    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1430        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1431
1432    cov = np.zeros((length, length))
1433    for i in range(length):
1434        for j in range(i, length):
1435            cov[i, j] = _covariance_element(obs[i], obs[j])
1436    cov = cov + cov.T - np.diag(np.diag(cov))
1437
1438    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1439
1440    if isinstance(smooth, int):
1441        corr = _smooth_eigenvalues(corr, smooth)
1442
1443    if visualize:
1444        plt.matshow(corr, vmin=-1, vmax=1)
1445        plt.set_cmap('RdBu')
1446        plt.colorbar()
1447        plt.draw()
1448
1449    if correlation is True:
1450        return corr
1451
1452    errors = [o.dvalue for o in obs]
1453    cov = np.diag(errors) @ corr @ np.diag(errors)
1454
1455    eigenvalues = np.linalg.eigh(cov)[0]
1456    if not np.all(eigenvalues >= 0):
1457        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1458
1459    return cov
1460
1461
1462def _smooth_eigenvalues(corr, E):
1463    """Eigenvalue smoothing as described in hep-lat/9412087
1464
1465    corr : np.ndarray
1466        correlation matrix
1467    E : integer
1468        Number of eigenvalues to be left substantially unchanged
1469    """
1470    if not (2 < E < corr.shape[0] - 1):
1471        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1472    vals, vec = np.linalg.eigh(corr)
1473    lambda_min = np.mean(vals[:-E])
1474    vals[vals < lambda_min] = lambda_min
1475    vals /= np.mean(vals)
1476    return vec @ np.diag(vals) @ vec.T
1477
1478
1479def _covariance_element(obs1, obs2):
1480    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1481
1482    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1483        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1484        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1485        return np.sum(deltas1 * deltas2)
1486
1487    if set(obs1.names).isdisjoint(set(obs2.names)):
1488        return 0.0
1489
1490    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1491        raise Exception('The gamma method has to be applied to both Obs first.')
1492
1493    dvalue = 0.0
1494
1495    for e_name in obs1.mc_names:
1496
1497        if e_name not in obs2.mc_names:
1498            continue
1499
1500        idl_d = {}
1501        for r_name in obs1.e_content[e_name]:
1502            if r_name not in obs2.e_content[e_name]:
1503                continue
1504            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1505
1506        gamma = 0.0
1507
1508        for r_name in obs1.e_content[e_name]:
1509            if r_name not in obs2.e_content[e_name]:
1510                continue
1511            if len(idl_d[r_name]) == 0:
1512                continue
1513            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1514
1515        if gamma == 0.0:
1516            continue
1517
1518        gamma_div = 0.0
1519        for r_name in obs1.e_content[e_name]:
1520            if r_name not in obs2.e_content[e_name]:
1521                continue
1522            if len(idl_d[r_name]) == 0:
1523                continue
1524            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1525        gamma /= gamma_div
1526
1527        dvalue += gamma
1528
1529    for e_name in obs1.cov_names:
1530
1531        if e_name not in obs2.cov_names:
1532            continue
1533
1534        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1535
1536    return dvalue
1537
1538
1539def import_jackknife(jacks, name, idl=None):
1540    """Imports jackknife samples and returns an Obs
1541
1542    Parameters
1543    ----------
1544    jacks : numpy.ndarray
1545        numpy array containing the mean value as zeroth entry and
1546        the N jackknife samples as first to Nth entry.
1547    name : str
1548        name of the ensemble the samples are defined on.
1549    """
1550    length = len(jacks) - 1
1551    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1552    samples = jacks[1:] @ prj
1553    mean = np.mean(samples)
1554    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1555    new_obs._value = jacks[0]
1556    return new_obs
1557
1558
1559def merge_obs(list_of_obs):
1560    """Combine all observables in list_of_obs into one new observable
1561
1562    Parameters
1563    ----------
1564    list_of_obs : list
1565        list of the Obs object to be combined
1566
1567    Notes
1568    -----
1569    It is not possible to combine obs which are based on the same replicum
1570    """
1571    replist = [item for obs in list_of_obs for item in obs.names]
1572    if (len(replist) == len(set(replist))) is False:
1573        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1574    if any([len(o.cov_names) for o in list_of_obs]):
1575        raise Exception('Not possible to merge data that contains covobs!')
1576    new_dict = {}
1577    idl_dict = {}
1578    for o in list_of_obs:
1579        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1580                        for key in set(o.deltas) | set(o.r_values)})
1581        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1582
1583    names = sorted(new_dict.keys())
1584    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1585    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1586    return o
1587
1588
1589def cov_Obs(means, cov, name, grad=None):
1590    """Create an Obs based on mean(s) and a covariance matrix
1591
1592    Parameters
1593    ----------
1594    mean : list of floats or float
1595        N mean value(s) of the new Obs
1596    cov : list or array
1597        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1598    name : str
1599        identifier for the covariance matrix
1600    grad : list or array
1601        Gradient of the Covobs wrt. the means belonging to cov.
1602    """
1603
1604    def covobs_to_obs(co):
1605        """Make an Obs out of a Covobs
1606
1607        Parameters
1608        ----------
1609        co : Covobs
1610            Covobs to be embedded into the Obs
1611        """
1612        o = Obs([], [], means=[])
1613        o._value = co.value
1614        o.names.append(co.name)
1615        o._covobs[co.name] = co
1616        o._dvalue = np.sqrt(co.errsq())
1617        return o
1618
1619    ol = []
1620    if isinstance(means, (float, int)):
1621        means = [means]
1622
1623    for i in range(len(means)):
1624        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1625    if ol[0].covobs[name].N != len(means):
1626        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1627    if len(ol) == 1:
1628        return ol[0]
1629    return ol
class Obs:
 20class Obs:
 21    """Class for a general observable.
 22
 23    Instances of Obs are the basic objects of a pyerrors error analysis.
 24    They are initialized with a list which contains arrays of samples for
 25    different ensembles/replica and another list of same length which contains
 26    the names of the ensembles/replica. Mathematical operations can be
 27    performed on instances. The result is another instance of Obs. The error of
 28    an instance can be computed with the gamma_method. Also contains additional
 29    methods for output and visualization of the error calculation.
 30
 31    Attributes
 32    ----------
 33    S_global : float
 34        Standard value for S (default 2.0)
 35    S_dict : dict
 36        Dictionary for S values. If an entry for a given ensemble
 37        exists this overwrites the standard value for that ensemble.
 38    tau_exp_global : float
 39        Standard value for tau_exp (default 0.0)
 40    tau_exp_dict : dict
 41        Dictionary for tau_exp values. If an entry for a given ensemble exists
 42        this overwrites the standard value for that ensemble.
 43    N_sigma_global : float
 44        Standard value for N_sigma (default 1.0)
 45    N_sigma_dict : dict
 46        Dictionary for N_sigma values. If an entry for a given ensemble exists
 47        this overwrites the standard value for that ensemble.
 48    """
 49    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 50                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 51                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 52                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 53                 'idl', 'tag', '_covobs', '__dict__']
 54
 55    S_global = 2.0
 56    S_dict = {}
 57    tau_exp_global = 0.0
 58    tau_exp_dict = {}
 59    N_sigma_global = 1.0
 60    N_sigma_dict = {}
 61
 62    def __init__(self, samples, names, idl=None, **kwargs):
 63        """ Initialize Obs object.
 64
 65        Parameters
 66        ----------
 67        samples : list
 68            list of numpy arrays containing the Monte Carlo samples
 69        names : list
 70            list of strings labeling the individual samples
 71        idl : list, optional
 72            list of ranges or lists on which the samples are defined
 73        """
 74
 75        if kwargs.get("means") is None and len(samples):
 76            if len(samples) != len(names):
 77                raise Exception('Length of samples and names incompatible.')
 78            if idl is not None:
 79                if len(idl) != len(names):
 80                    raise Exception('Length of idl incompatible with samples and names.')
 81            name_length = len(names)
 82            if name_length > 1:
 83                if name_length != len(set(names)):
 84                    raise Exception('names are not unique.')
 85                if not all(isinstance(x, str) for x in names):
 86                    raise TypeError('All names have to be strings.')
 87            else:
 88                if not isinstance(names[0], str):
 89                    raise TypeError('All names have to be strings.')
 90            if min(len(x) for x in samples) <= 4:
 91                raise Exception('Samples have to have at least 5 entries.')
 92
 93        self.names = sorted(names)
 94        self.shape = {}
 95        self.r_values = {}
 96        self.deltas = {}
 97        self._covobs = {}
 98
 99        self._value = 0
100        self.N = 0
101        self.idl = {}
102        if idl is not None:
103            for name, idx in sorted(zip(names, idl)):
104                if isinstance(idx, range):
105                    self.idl[name] = idx
106                elif isinstance(idx, (list, np.ndarray)):
107                    dc = np.unique(np.diff(idx))
108                    if np.any(dc < 0):
109                        raise Exception("Unsorted idx for idl[%s]" % (name))
110                    if len(dc) == 1:
111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
112                    else:
113                        self.idl[name] = list(idx)
114                else:
115                    raise Exception('incompatible type for idl[%s].' % (name))
116        else:
117            for name, sample in sorted(zip(names, samples)):
118                self.idl[name] = range(1, len(sample) + 1)
119
120        if kwargs.get("means") is not None:
121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
122                self.shape[name] = len(self.idl[name])
123                self.N += self.shape[name]
124                self.r_values[name] = mean
125                self.deltas[name] = sample
126        else:
127            for name, sample in sorted(zip(names, samples)):
128                self.shape[name] = len(self.idl[name])
129                self.N += self.shape[name]
130                if len(sample) != self.shape[name]:
131                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
132                self.r_values[name] = np.mean(sample)
133                self.deltas[name] = sample - self.r_values[name]
134                self._value += self.shape[name] * self.r_values[name]
135            self._value /= self.N
136
137        self._dvalue = 0.0
138        self.ddvalue = 0.0
139        self.reweighted = False
140
141        self.tag = None
142
143    @property
144    def value(self):
145        return self._value
146
147    @property
148    def dvalue(self):
149        return self._dvalue
150
151    @property
152    def e_names(self):
153        return sorted(set([o.split('|')[0] for o in self.names]))
154
155    @property
156    def cov_names(self):
157        return sorted(set([o for o in self.covobs.keys()]))
158
159    @property
160    def mc_names(self):
161        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
162
163    @property
164    def e_content(self):
165        res = {}
166        for e, e_name in enumerate(self.e_names):
167            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
168            if e_name in self.names:
169                res[e_name].append(e_name)
170        return res
171
172    @property
173    def covobs(self):
174        return self._covobs
175
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]))
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            gaps = []
273            for r_name in e_content[e_name]:
274                if isinstance(self.idl[r_name], range):
275                    gaps.append(1)
276                else:
277                    gaps.append(np.min(np.diff(self.idl[r_name])))
278
279            if not np.all([gi == gaps[0] for gi in gaps]):
280                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
281            else:
282                gapsize = gaps[0]
283
284            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
285            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
286            # Make sure no entry of tauint is smaller than 0.5
287            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
288            # hep-lat/0306017 eq. (42)
289            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
290            self.e_n_dtauint[e_name][0] = 0.0
291
292            def _compute_drho(i):
293                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
294                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
295
296            _compute_drho(gapsize)
297            if self.tau_exp[e_name] > 0:
298                texp = self.tau_exp[e_name]
299                # Critical slowing down analysis
300                if w_max // 2 <= 1:
301                    raise Exception("Need at least 8 samples for tau_exp error analysis")
302                for n in range(gapsize, w_max // 2, gapsize):
303                    _compute_drho(n + gapsize)
304                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
305                        # Bias correction hep-lat/0306017 eq. (49) included
306                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
307                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
308                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
309                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
310                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
311                        self.e_windowsize[e_name] = n
312                        break
313            else:
314                if self.S[e_name] == 0.0:
315                    self.e_tauint[e_name] = 0.5
316                    self.e_dtauint[e_name] = 0.0
317                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
318                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
319                    self.e_windowsize[e_name] = 0
320                else:
321                    # Standard automatic windowing procedure
322                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
323                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
324                    for n in range(1, w_max):
325                        if n < w_max // 2 - 2:
326                            _compute_drho(gapsize * n + gapsize)
327                        if g_w[n - 1] < 0 or n >= w_max - 1:
328                            n *= gapsize
329                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
330                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
331                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
332                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
333                            self.e_windowsize[e_name] = n
334                            break
335
336            self._dvalue += self.e_dvalue[e_name] ** 2
337            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
338
339        for e_name in self.cov_names:
340            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
341            self.e_ddvalue[e_name] = 0
342            self._dvalue += self.e_dvalue[e_name]**2
343
344        self._dvalue = np.sqrt(self._dvalue)
345        if self._dvalue == 0.0:
346            self.ddvalue = 0.0
347        else:
348            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
349        return
350
351    gm = gamma_method
352
353    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
354        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
355           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
356
357        Parameters
358        ----------
359        deltas : list
360            List of fluctuations
361        idx : list
362            List or range of configurations on which the deltas are defined.
363        shape : int
364            Number of configurations in idx.
365        w_max : int
366            Upper bound for the summation window.
367        fft : bool
368            determines whether the fft algorithm is used for the computation
369            of the autocorrelation function.
370        """
371        gamma = np.zeros(w_max)
372        deltas = _expand_deltas(deltas, idx, shape)
373        new_shape = len(deltas)
374        if fft:
375            max_gamma = min(new_shape, w_max)
376            # The padding for the fft has to be even
377            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
378            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
379        else:
380            for n in range(w_max):
381                if new_shape - n >= 0:
382                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
383
384        return gamma
385
386    def details(self, ens_content=True):
387        """Output detailed properties of the Obs.
388
389        Parameters
390        ----------
391        ens_content : bool
392            print details about the ensembles and replica if true.
393        """
394        if self.tag is not None:
395            print("Description:", self.tag)
396        if not hasattr(self, 'e_dvalue'):
397            print('Result\t %3.8e' % (self.value))
398        else:
399            if self.value == 0.0:
400                percentage = np.nan
401            else:
402                percentage = np.abs(self._dvalue / self.value) * 100
403            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
404            if len(self.e_names) > 1:
405                print(' Ensemble errors:')
406            e_content = self.e_content
407            for e_name in self.mc_names:
408                if isinstance(self.idl[e_content[e_name][0]], range):
409                    gap = self.idl[e_content[e_name][0]].step
410                else:
411                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
412
413                if len(self.e_names) > 1:
414                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
415                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
416                tau_string += f" in units of {gap} config"
417                if gap > 1:
418                    tau_string += "s"
419                if self.tau_exp[e_name] > 0:
420                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
421                else:
422                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
423                print(tau_string)
424            for e_name in self.cov_names:
425                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
426        if ens_content is True:
427            if len(self.e_names) == 1:
428                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
429            else:
430                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
431            my_string_list = []
432            for key, value in sorted(self.e_content.items()):
433                if key not in self.covobs:
434                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
435                    if len(value) == 1:
436                        my_string += f': {self.shape[value[0]]} configurations'
437                        if isinstance(self.idl[value[0]], range):
438                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
439                        else:
440                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
441                    else:
442                        sublist = []
443                        for v in value:
444                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
445                            my_substring += f': {self.shape[v]} configurations'
446                            if isinstance(self.idl[v], range):
447                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
448                            else:
449                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
450                            sublist.append(my_substring)
451
452                        my_string += '\n' + '\n'.join(sublist)
453                else:
454                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
455                my_string_list.append(my_string)
456            print('\n'.join(my_string_list))
457
458    def reweight(self, weight):
459        """Reweight the obs with given rewighting factors.
460
461        Parameters
462        ----------
463        weight : Obs
464            Reweighting factor. An Observable that has to be defined on a superset of the
465            configurations in obs[i].idl for all i.
466        all_configs : bool
467            if True, the reweighted observables are normalized by the average of
468            the reweighting factor on all configurations in weight.idl and not
469            on the configurations in obs[i].idl. Default False.
470        """
471        return reweight(weight, [self])[0]
472
473    def is_zero_within_error(self, sigma=1):
474        """Checks whether the observable is zero within 'sigma' standard errors.
475
476        Parameters
477        ----------
478        sigma : int
479            Number of standard errors used for the check.
480
481        Works only properly when the gamma method was run.
482        """
483        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
484
485    def is_zero(self, atol=1e-10):
486        """Checks whether the observable is zero within a given tolerance.
487
488        Parameters
489        ----------
490        atol : float
491            Absolute tolerance (for details see numpy documentation).
492        """
493        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
494
495    def plot_tauint(self, save=None):
496        """Plot integrated autocorrelation time for each ensemble.
497
498        Parameters
499        ----------
500        save : str
501            saves the figure to a file named 'save' if.
502        """
503        if not hasattr(self, 'e_dvalue'):
504            raise Exception('Run the gamma method first.')
505
506        for e, e_name in enumerate(self.mc_names):
507            fig = plt.figure()
508            plt.xlabel(r'$W$')
509            plt.ylabel(r'$\tau_\mathrm{int}$')
510            length = int(len(self.e_n_tauint[e_name]))
511            if self.tau_exp[e_name] > 0:
512                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
513                x_help = np.arange(2 * self.tau_exp[e_name])
514                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
515                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
516                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
517                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
518                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
519                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
520                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
521            else:
522                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
523                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
524
525            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
526            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
527            plt.legend()
528            plt.xlim(-0.5, xmax)
529            ylim = plt.ylim()
530            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
531            plt.draw()
532            if save:
533                fig.savefig(save + "_" + str(e))
534
535    def plot_rho(self, save=None):
536        """Plot normalized autocorrelation function time for each ensemble.
537
538        Parameters
539        ----------
540        save : str
541            saves the figure to a file named 'save' if.
542        """
543        if not hasattr(self, 'e_dvalue'):
544            raise Exception('Run the gamma method first.')
545        for e, e_name in enumerate(self.mc_names):
546            fig = plt.figure()
547            plt.xlabel('W')
548            plt.ylabel('rho')
549            length = int(len(self.e_drho[e_name]))
550            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
551            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
552            if self.tau_exp[e_name] > 0:
553                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
554                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
555                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
556                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
557            else:
558                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
559                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
560            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
561            plt.xlim(-0.5, xmax)
562            plt.draw()
563            if save:
564                fig.savefig(save + "_" + str(e))
565
566    def plot_rep_dist(self):
567        """Plot replica distribution for each ensemble with more than one replicum."""
568        if not hasattr(self, 'e_dvalue'):
569            raise Exception('Run the gamma method first.')
570        for e, e_name in enumerate(self.mc_names):
571            if len(self.e_content[e_name]) == 1:
572                print('No replica distribution for a single replicum (', e_name, ')')
573                continue
574            r_length = []
575            sub_r_mean = 0
576            for r, r_name in enumerate(self.e_content[e_name]):
577                r_length.append(len(self.deltas[r_name]))
578                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
579            e_N = np.sum(r_length)
580            sub_r_mean /= e_N
581            arr = np.zeros(len(self.e_content[e_name]))
582            for r, r_name in enumerate(self.e_content[e_name]):
583                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
584            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
585            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
586            plt.draw()
587
588    def plot_history(self, expand=True):
589        """Plot derived Monte Carlo history for each ensemble
590
591        Parameters
592        ----------
593        expand : bool
594            show expanded history for irregular Monte Carlo chains (default: True).
595        """
596        for e, e_name in enumerate(self.mc_names):
597            plt.figure()
598            r_length = []
599            tmp = []
600            tmp_expanded = []
601            for r, r_name in enumerate(self.e_content[e_name]):
602                tmp.append(self.deltas[r_name] + self.r_values[r_name])
603                if expand:
604                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
605                    r_length.append(len(tmp_expanded[-1]))
606                else:
607                    r_length.append(len(tmp[-1]))
608            e_N = np.sum(r_length)
609            x = np.arange(e_N)
610            y_test = np.concatenate(tmp, axis=0)
611            if expand:
612                y = np.concatenate(tmp_expanded, axis=0)
613            else:
614                y = y_test
615            plt.errorbar(x, y, fmt='.', markersize=3)
616            plt.xlim(-0.5, e_N - 0.5)
617            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
618            plt.draw()
619
620    def plot_piechart(self, save=None):
621        """Plot piechart which shows the fractional contribution of each
622        ensemble to the error and returns a dictionary containing the fractions.
623
624        Parameters
625        ----------
626        save : str
627            saves the figure to a file named 'save' if.
628        """
629        if not hasattr(self, 'e_dvalue'):
630            raise Exception('Run the gamma method first.')
631        if np.isclose(0.0, self._dvalue, atol=1e-15):
632            raise Exception('Error is 0.0')
633        labels = self.e_names
634        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
635        fig1, ax1 = plt.subplots()
636        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
637        ax1.axis('equal')
638        plt.draw()
639        if save:
640            fig1.savefig(save)
641
642        return dict(zip(self.e_names, sizes))
643
644    def dump(self, filename, datatype="json.gz", description="", **kwargs):
645        """Dump the Obs to a file 'name' of chosen format.
646
647        Parameters
648        ----------
649        filename : str
650            name of the file to be saved.
651        datatype : str
652            Format of the exported file. Supported formats include
653            "json.gz" and "pickle"
654        description : str
655            Description for output file, only relevant for json.gz format.
656        path : str
657            specifies a custom path for the file (default '.')
658        """
659        if 'path' in kwargs:
660            file_name = kwargs.get('path') + '/' + filename
661        else:
662            file_name = filename
663
664        if datatype == "json.gz":
665            from .input.json import dump_to_json
666            dump_to_json([self], file_name, description=description)
667        elif datatype == "pickle":
668            with open(file_name + '.p', 'wb') as fb:
669                pickle.dump(self, fb)
670        else:
671            raise Exception("Unknown datatype " + str(datatype))
672
673    def export_jackknife(self):
674        """Export jackknife samples from the Obs
675
676        Returns
677        -------
678        numpy.ndarray
679            Returns a numpy array of length N + 1 where N is the number of samples
680            for the given ensemble and replicum. The zeroth entry of the array contains
681            the mean value of the Obs, entries 1 to N contain the N jackknife samples
682            derived from the Obs. The current implementation only works for observables
683            defined on exactly one ensemble and replicum. The derived jackknife samples
684            should agree with samples from a full jackknife analysis up to O(1/N).
685        """
686
687        if len(self.names) != 1:
688            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
689
690        name = self.names[0]
691        full_data = self.deltas[name] + self.r_values[name]
692        n = full_data.size
693        mean = self.value
694        tmp_jacks = np.zeros(n + 1)
695        tmp_jacks[0] = mean
696        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
697        return tmp_jacks
698
699    def __float__(self):
700        return float(self.value)
701
702    def __repr__(self):
703        return 'Obs[' + str(self) + ']'
704
705    def __str__(self):
706        return _format_uncertainty(self.value, self._dvalue)
707
708    def __hash__(self):
709        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
710        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
711        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
712        hash_tuple += tuple([o.encode() for o in self.names])
713        m = hashlib.md5()
714        [m.update(o) for o in hash_tuple]
715        return int(m.hexdigest(), 16) & 0xFFFFFFFF
716
717    # Overload comparisons
718    def __lt__(self, other):
719        return self.value < other
720
721    def __le__(self, other):
722        return self.value <= other
723
724    def __gt__(self, other):
725        return self.value > other
726
727    def __ge__(self, other):
728        return self.value >= other
729
730    def __eq__(self, other):
731        return (self - other).is_zero()
732
733    def __ne__(self, other):
734        return not (self - other).is_zero()
735
736    # Overload math operations
737    def __add__(self, y):
738        if isinstance(y, Obs):
739            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
740        else:
741            if isinstance(y, np.ndarray):
742                return np.array([self + o for o in y])
743            elif y.__class__.__name__ in ['Corr', 'CObs']:
744                return NotImplemented
745            else:
746                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
747
748    def __radd__(self, y):
749        return self + y
750
751    def __mul__(self, y):
752        if isinstance(y, Obs):
753            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
754        else:
755            if isinstance(y, np.ndarray):
756                return np.array([self * o for o in y])
757            elif isinstance(y, complex):
758                return CObs(self * y.real, self * y.imag)
759            elif y.__class__.__name__ in ['Corr', 'CObs']:
760                return NotImplemented
761            else:
762                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
763
764    def __rmul__(self, y):
765        return self * y
766
767    def __sub__(self, y):
768        if isinstance(y, Obs):
769            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
770        else:
771            if isinstance(y, np.ndarray):
772                return np.array([self - o for o in y])
773            elif y.__class__.__name__ in ['Corr', 'CObs']:
774                return NotImplemented
775            else:
776                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
777
778    def __rsub__(self, y):
779        return -1 * (self - y)
780
781    def __pos__(self):
782        return self
783
784    def __neg__(self):
785        return -1 * self
786
787    def __truediv__(self, y):
788        if isinstance(y, Obs):
789            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
790        else:
791            if isinstance(y, np.ndarray):
792                return np.array([self / o for o in y])
793            elif y.__class__.__name__ in ['Corr', 'CObs']:
794                return NotImplemented
795            else:
796                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
797
798    def __rtruediv__(self, y):
799        if isinstance(y, Obs):
800            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
801        else:
802            if isinstance(y, np.ndarray):
803                return np.array([o / self for o in y])
804            elif y.__class__.__name__ in ['Corr', 'CObs']:
805                return NotImplemented
806            else:
807                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
808
809    def __pow__(self, y):
810        if isinstance(y, Obs):
811            return derived_observable(lambda x: x[0] ** x[1], [self, y])
812        else:
813            return derived_observable(lambda x: x[0] ** y, [self])
814
815    def __rpow__(self, y):
816        if isinstance(y, Obs):
817            return derived_observable(lambda x: x[0] ** x[1], [y, self])
818        else:
819            return derived_observable(lambda x: y ** x[0], [self])
820
821    def __abs__(self):
822        return derived_observable(lambda x: anp.abs(x[0]), [self])
823
824    # Overload numpy functions
825    def sqrt(self):
826        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
827
828    def log(self):
829        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
830
831    def exp(self):
832        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
833
834    def sin(self):
835        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
836
837    def cos(self):
838        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
839
840    def tan(self):
841        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
842
843    def arcsin(self):
844        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
845
846    def arccos(self):
847        return derived_observable(lambda x: anp.arccos(x[0]), [self])
848
849    def arctan(self):
850        return derived_observable(lambda x: anp.arctan(x[0]), [self])
851
852    def sinh(self):
853        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
854
855    def cosh(self):
856        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
857
858    def tanh(self):
859        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
860
861    def arcsinh(self):
862        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
863
864    def arccosh(self):
865        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
866
867    def arctanh(self):
868        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 62    def __init__(self, samples, names, idl=None, **kwargs):
 63        """ Initialize Obs object.
 64
 65        Parameters
 66        ----------
 67        samples : list
 68            list of numpy arrays containing the Monte Carlo samples
 69        names : list
 70            list of strings labeling the individual samples
 71        idl : list, optional
 72            list of ranges or lists on which the samples are defined
 73        """
 74
 75        if kwargs.get("means") is None and len(samples):
 76            if len(samples) != len(names):
 77                raise Exception('Length of samples and names incompatible.')
 78            if idl is not None:
 79                if len(idl) != len(names):
 80                    raise Exception('Length of idl incompatible with samples and names.')
 81            name_length = len(names)
 82            if name_length > 1:
 83                if name_length != len(set(names)):
 84                    raise Exception('names are not unique.')
 85                if not all(isinstance(x, str) for x in names):
 86                    raise TypeError('All names have to be strings.')
 87            else:
 88                if not isinstance(names[0], str):
 89                    raise TypeError('All names have to be strings.')
 90            if min(len(x) for x in samples) <= 4:
 91                raise Exception('Samples have to have at least 5 entries.')
 92
 93        self.names = sorted(names)
 94        self.shape = {}
 95        self.r_values = {}
 96        self.deltas = {}
 97        self._covobs = {}
 98
 99        self._value = 0
100        self.N = 0
101        self.idl = {}
102        if idl is not None:
103            for name, idx in sorted(zip(names, idl)):
104                if isinstance(idx, range):
105                    self.idl[name] = idx
106                elif isinstance(idx, (list, np.ndarray)):
107                    dc = np.unique(np.diff(idx))
108                    if np.any(dc < 0):
109                        raise Exception("Unsorted idx for idl[%s]" % (name))
110                    if len(dc) == 1:
111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
112                    else:
113                        self.idl[name] = list(idx)
114                else:
115                    raise Exception('incompatible type for idl[%s].' % (name))
116        else:
117            for name, sample in sorted(zip(names, samples)):
118                self.idl[name] = range(1, len(sample) + 1)
119
120        if kwargs.get("means") is not None:
121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
122                self.shape[name] = len(self.idl[name])
123                self.N += self.shape[name]
124                self.r_values[name] = mean
125                self.deltas[name] = sample
126        else:
127            for name, sample in sorted(zip(names, samples)):
128                self.shape[name] = len(self.idl[name])
129                self.N += self.shape[name]
130                if len(sample) != self.shape[name]:
131                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
132                self.r_values[name] = np.mean(sample)
133                self.deltas[name] = sample - self.r_values[name]
134                self._value += self.shape[name] * self.r_values[name]
135            self._value /= self.N
136
137        self._dvalue = 0.0
138        self.ddvalue = 0.0
139        self.reweighted = False
140
141        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
def gamma_method(self, **kwargs):
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]))
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            gaps = []
273            for r_name in e_content[e_name]:
274                if isinstance(self.idl[r_name], range):
275                    gaps.append(1)
276                else:
277                    gaps.append(np.min(np.diff(self.idl[r_name])))
278
279            if not np.all([gi == gaps[0] for gi in gaps]):
280                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
281            else:
282                gapsize = gaps[0]
283
284            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
285            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
286            # Make sure no entry of tauint is smaller than 0.5
287            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
288            # hep-lat/0306017 eq. (42)
289            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
290            self.e_n_dtauint[e_name][0] = 0.0
291
292            def _compute_drho(i):
293                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
294                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
295
296            _compute_drho(gapsize)
297            if self.tau_exp[e_name] > 0:
298                texp = self.tau_exp[e_name]
299                # Critical slowing down analysis
300                if w_max // 2 <= 1:
301                    raise Exception("Need at least 8 samples for tau_exp error analysis")
302                for n in range(gapsize, w_max // 2, gapsize):
303                    _compute_drho(n + gapsize)
304                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
305                        # Bias correction hep-lat/0306017 eq. (49) included
306                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
307                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
308                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
309                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
310                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
311                        self.e_windowsize[e_name] = n
312                        break
313            else:
314                if self.S[e_name] == 0.0:
315                    self.e_tauint[e_name] = 0.5
316                    self.e_dtauint[e_name] = 0.0
317                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
318                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
319                    self.e_windowsize[e_name] = 0
320                else:
321                    # Standard automatic windowing procedure
322                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
323                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
324                    for n in range(1, w_max):
325                        if n < w_max // 2 - 2:
326                            _compute_drho(gapsize * n + gapsize)
327                        if g_w[n - 1] < 0 or n >= w_max - 1:
328                            n *= gapsize
329                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
330                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
331                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
332                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
333                            self.e_windowsize[e_name] = n
334                            break
335
336            self._dvalue += self.e_dvalue[e_name] ** 2
337            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
338
339        for e_name in self.cov_names:
340            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
341            self.e_ddvalue[e_name] = 0
342            self._dvalue += self.e_dvalue[e_name]**2
343
344        self._dvalue = np.sqrt(self._dvalue)
345        if self._dvalue == 0.0:
346            self.ddvalue = 0.0
347        else:
348            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
349        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def gm(self, **kwargs):
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]))
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            gaps = []
273            for r_name in e_content[e_name]:
274                if isinstance(self.idl[r_name], range):
275                    gaps.append(1)
276                else:
277                    gaps.append(np.min(np.diff(self.idl[r_name])))
278
279            if not np.all([gi == gaps[0] for gi in gaps]):
280                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
281            else:
282                gapsize = gaps[0]
283
284            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
285            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
286            # Make sure no entry of tauint is smaller than 0.5
287            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
288            # hep-lat/0306017 eq. (42)
289            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
290            self.e_n_dtauint[e_name][0] = 0.0
291
292            def _compute_drho(i):
293                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
294                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
295
296            _compute_drho(gapsize)
297            if self.tau_exp[e_name] > 0:
298                texp = self.tau_exp[e_name]
299                # Critical slowing down analysis
300                if w_max // 2 <= 1:
301                    raise Exception("Need at least 8 samples for tau_exp error analysis")
302                for n in range(gapsize, w_max // 2, gapsize):
303                    _compute_drho(n + gapsize)
304                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
305                        # Bias correction hep-lat/0306017 eq. (49) included
306                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
307                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
308                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
309                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
310                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
311                        self.e_windowsize[e_name] = n
312                        break
313            else:
314                if self.S[e_name] == 0.0:
315                    self.e_tauint[e_name] = 0.5
316                    self.e_dtauint[e_name] = 0.0
317                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
318                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
319                    self.e_windowsize[e_name] = 0
320                else:
321                    # Standard automatic windowing procedure
322                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
323                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
324                    for n in range(1, w_max):
325                        if n < w_max // 2 - 2:
326                            _compute_drho(gapsize * n + gapsize)
327                        if g_w[n - 1] < 0 or n >= w_max - 1:
328                            n *= gapsize
329                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
330                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
331                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
332                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
333                            self.e_windowsize[e_name] = n
334                            break
335
336            self._dvalue += self.e_dvalue[e_name] ** 2
337            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
338
339        for e_name in self.cov_names:
340            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
341            self.e_ddvalue[e_name] = 0
342            self._dvalue += self.e_dvalue[e_name]**2
343
344        self._dvalue = np.sqrt(self._dvalue)
345        if self._dvalue == 0.0:
346            self.ddvalue = 0.0
347        else:
348            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
349        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True):
386    def details(self, ens_content=True):
387        """Output detailed properties of the Obs.
388
389        Parameters
390        ----------
391        ens_content : bool
392            print details about the ensembles and replica if true.
393        """
394        if self.tag is not None:
395            print("Description:", self.tag)
396        if not hasattr(self, 'e_dvalue'):
397            print('Result\t %3.8e' % (self.value))
398        else:
399            if self.value == 0.0:
400                percentage = np.nan
401            else:
402                percentage = np.abs(self._dvalue / self.value) * 100
403            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
404            if len(self.e_names) > 1:
405                print(' Ensemble errors:')
406            e_content = self.e_content
407            for e_name in self.mc_names:
408                if isinstance(self.idl[e_content[e_name][0]], range):
409                    gap = self.idl[e_content[e_name][0]].step
410                else:
411                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
412
413                if len(self.e_names) > 1:
414                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
415                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
416                tau_string += f" in units of {gap} config"
417                if gap > 1:
418                    tau_string += "s"
419                if self.tau_exp[e_name] > 0:
420                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
421                else:
422                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
423                print(tau_string)
424            for e_name in self.cov_names:
425                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
426        if ens_content is True:
427            if len(self.e_names) == 1:
428                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
429            else:
430                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
431            my_string_list = []
432            for key, value in sorted(self.e_content.items()):
433                if key not in self.covobs:
434                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
435                    if len(value) == 1:
436                        my_string += f': {self.shape[value[0]]} configurations'
437                        if isinstance(self.idl[value[0]], range):
438                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
439                        else:
440                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
441                    else:
442                        sublist = []
443                        for v in value:
444                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
445                            my_substring += f': {self.shape[v]} configurations'
446                            if isinstance(self.idl[v], range):
447                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
448                            else:
449                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
450                            sublist.append(my_substring)
451
452                        my_string += '\n' + '\n'.join(sublist)
453                else:
454                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
455                my_string_list.append(my_string)
456            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def reweight(self, weight):
458    def reweight(self, weight):
459        """Reweight the obs with given rewighting factors.
460
461        Parameters
462        ----------
463        weight : Obs
464            Reweighting factor. An Observable that has to be defined on a superset of the
465            configurations in obs[i].idl for all i.
466        all_configs : bool
467            if True, the reweighted observables are normalized by the average of
468            the reweighting factor on all configurations in weight.idl and not
469            on the configurations in obs[i].idl. Default False.
470        """
471        return reweight(weight, [self])[0]

Reweight the obs with given rewighting factors.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def is_zero_within_error(self, sigma=1):
473    def is_zero_within_error(self, sigma=1):
474        """Checks whether the observable is zero within 'sigma' standard errors.
475
476        Parameters
477        ----------
478        sigma : int
479            Number of standard errors used for the check.
480
481        Works only properly when the gamma method was run.
482        """
483        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10):
485    def is_zero(self, atol=1e-10):
486        """Checks whether the observable is zero within a given tolerance.
487
488        Parameters
489        ----------
490        atol : float
491            Absolute tolerance (for details see numpy documentation).
492        """
493        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None):
495    def plot_tauint(self, save=None):
496        """Plot integrated autocorrelation time for each ensemble.
497
498        Parameters
499        ----------
500        save : str
501            saves the figure to a file named 'save' if.
502        """
503        if not hasattr(self, 'e_dvalue'):
504            raise Exception('Run the gamma method first.')
505
506        for e, e_name in enumerate(self.mc_names):
507            fig = plt.figure()
508            plt.xlabel(r'$W$')
509            plt.ylabel(r'$\tau_\mathrm{int}$')
510            length = int(len(self.e_n_tauint[e_name]))
511            if self.tau_exp[e_name] > 0:
512                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
513                x_help = np.arange(2 * self.tau_exp[e_name])
514                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
515                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
516                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
517                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
518                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
519                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
520                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
521            else:
522                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
523                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
524
525            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
526            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
527            plt.legend()
528            plt.xlim(-0.5, xmax)
529            ylim = plt.ylim()
530            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
531            plt.draw()
532            if save:
533                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None):
535    def plot_rho(self, save=None):
536        """Plot normalized autocorrelation function time for each ensemble.
537
538        Parameters
539        ----------
540        save : str
541            saves the figure to a file named 'save' if.
542        """
543        if not hasattr(self, 'e_dvalue'):
544            raise Exception('Run the gamma method first.')
545        for e, e_name in enumerate(self.mc_names):
546            fig = plt.figure()
547            plt.xlabel('W')
548            plt.ylabel('rho')
549            length = int(len(self.e_drho[e_name]))
550            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
551            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
552            if self.tau_exp[e_name] > 0:
553                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
554                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
555                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
556                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
557            else:
558                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
559                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
560            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
561            plt.xlim(-0.5, xmax)
562            plt.draw()
563            if save:
564                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self):
566    def plot_rep_dist(self):
567        """Plot replica distribution for each ensemble with more than one replicum."""
568        if not hasattr(self, 'e_dvalue'):
569            raise Exception('Run the gamma method first.')
570        for e, e_name in enumerate(self.mc_names):
571            if len(self.e_content[e_name]) == 1:
572                print('No replica distribution for a single replicum (', e_name, ')')
573                continue
574            r_length = []
575            sub_r_mean = 0
576            for r, r_name in enumerate(self.e_content[e_name]):
577                r_length.append(len(self.deltas[r_name]))
578                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
579            e_N = np.sum(r_length)
580            sub_r_mean /= e_N
581            arr = np.zeros(len(self.e_content[e_name]))
582            for r, r_name in enumerate(self.e_content[e_name]):
583                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
584            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
585            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
586            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True):
588    def plot_history(self, expand=True):
589        """Plot derived Monte Carlo history for each ensemble
590
591        Parameters
592        ----------
593        expand : bool
594            show expanded history for irregular Monte Carlo chains (default: True).
595        """
596        for e, e_name in enumerate(self.mc_names):
597            plt.figure()
598            r_length = []
599            tmp = []
600            tmp_expanded = []
601            for r, r_name in enumerate(self.e_content[e_name]):
602                tmp.append(self.deltas[r_name] + self.r_values[r_name])
603                if expand:
604                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
605                    r_length.append(len(tmp_expanded[-1]))
606                else:
607                    r_length.append(len(tmp[-1]))
608            e_N = np.sum(r_length)
609            x = np.arange(e_N)
610            y_test = np.concatenate(tmp, axis=0)
611            if expand:
612                y = np.concatenate(tmp_expanded, axis=0)
613            else:
614                y = y_test
615            plt.errorbar(x, y, fmt='.', markersize=3)
616            plt.xlim(-0.5, e_N - 0.5)
617            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
618            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None):
620    def plot_piechart(self, save=None):
621        """Plot piechart which shows the fractional contribution of each
622        ensemble to the error and returns a dictionary containing the fractions.
623
624        Parameters
625        ----------
626        save : str
627            saves the figure to a file named 'save' if.
628        """
629        if not hasattr(self, 'e_dvalue'):
630            raise Exception('Run the gamma method first.')
631        if np.isclose(0.0, self._dvalue, atol=1e-15):
632            raise Exception('Error is 0.0')
633        labels = self.e_names
634        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
635        fig1, ax1 = plt.subplots()
636        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
637        ax1.axis('equal')
638        plt.draw()
639        if save:
640            fig1.savefig(save)
641
642        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs):
644    def dump(self, filename, datatype="json.gz", description="", **kwargs):
645        """Dump the Obs to a file 'name' of chosen format.
646
647        Parameters
648        ----------
649        filename : str
650            name of the file to be saved.
651        datatype : str
652            Format of the exported file. Supported formats include
653            "json.gz" and "pickle"
654        description : str
655            Description for output file, only relevant for json.gz format.
656        path : str
657            specifies a custom path for the file (default '.')
658        """
659        if 'path' in kwargs:
660            file_name = kwargs.get('path') + '/' + filename
661        else:
662            file_name = filename
663
664        if datatype == "json.gz":
665            from .input.json import dump_to_json
666            dump_to_json([self], file_name, description=description)
667        elif datatype == "pickle":
668            with open(file_name + '.p', 'wb') as fb:
669                pickle.dump(self, fb)
670        else:
671            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self):
673    def export_jackknife(self):
674        """Export jackknife samples from the Obs
675
676        Returns
677        -------
678        numpy.ndarray
679            Returns a numpy array of length N + 1 where N is the number of samples
680            for the given ensemble and replicum. The zeroth entry of the array contains
681            the mean value of the Obs, entries 1 to N contain the N jackknife samples
682            derived from the Obs. The current implementation only works for observables
683            defined on exactly one ensemble and replicum. The derived jackknife samples
684            should agree with samples from a full jackknife analysis up to O(1/N).
685        """
686
687        if len(self.names) != 1:
688            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
689
690        name = self.names[0]
691        full_data = self.deltas[name] + self.r_values[name]
692        n = full_data.size
693        mean = self.value
694        tmp_jacks = np.zeros(n + 1)
695        tmp_jacks[0] = mean
696        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
697        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def sqrt(self):
825    def sqrt(self):
826        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self):
828    def log(self):
829        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self):
831    def exp(self):
832        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self):
834    def sin(self):
835        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self):
837    def cos(self):
838        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self):
840    def tan(self):
841        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self):
843    def arcsin(self):
844        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self):
846    def arccos(self):
847        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self):
849    def arctan(self):
850        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self):
852    def sinh(self):
853        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self):
855    def cosh(self):
856        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self):
858    def tanh(self):
859        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self):
861    def arcsinh(self):
862        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self):
864    def arccosh(self):
865        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self):
867    def arctanh(self):
868        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
class CObs:
871class CObs:
872    """Class for a complex valued observable."""
873    __slots__ = ['_real', '_imag', 'tag']
874
875    def __init__(self, real, imag=0.0):
876        self._real = real
877        self._imag = imag
878        self.tag = None
879
880    @property
881    def real(self):
882        return self._real
883
884    @property
885    def imag(self):
886        return self._imag
887
888    def gamma_method(self, **kwargs):
889        """Executes the gamma_method for the real and the imaginary part."""
890        if isinstance(self.real, Obs):
891            self.real.gamma_method(**kwargs)
892        if isinstance(self.imag, Obs):
893            self.imag.gamma_method(**kwargs)
894
895    def is_zero(self):
896        """Checks whether both real and imaginary part are zero within machine precision."""
897        return self.real == 0.0 and self.imag == 0.0
898
899    def conjugate(self):
900        return CObs(self.real, -self.imag)
901
902    def __add__(self, other):
903        if isinstance(other, np.ndarray):
904            return other + self
905        elif hasattr(other, 'real') and hasattr(other, 'imag'):
906            return CObs(self.real + other.real,
907                        self.imag + other.imag)
908        else:
909            return CObs(self.real + other, self.imag)
910
911    def __radd__(self, y):
912        return self + y
913
914    def __sub__(self, other):
915        if isinstance(other, np.ndarray):
916            return -1 * (other - self)
917        elif hasattr(other, 'real') and hasattr(other, 'imag'):
918            return CObs(self.real - other.real, self.imag - other.imag)
919        else:
920            return CObs(self.real - other, self.imag)
921
922    def __rsub__(self, other):
923        return -1 * (self - other)
924
925    def __mul__(self, other):
926        if isinstance(other, np.ndarray):
927            return other * self
928        elif hasattr(other, 'real') and hasattr(other, 'imag'):
929            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
930                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
931                                               [self.real, other.real, self.imag, other.imag],
932                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
933                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
934                                               [self.real, other.real, self.imag, other.imag],
935                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
936            elif getattr(other, 'imag', 0) != 0:
937                return CObs(self.real * other.real - self.imag * other.imag,
938                            self.imag * other.real + self.real * other.imag)
939            else:
940                return CObs(self.real * other.real, self.imag * other.real)
941        else:
942            return CObs(self.real * other, self.imag * other)
943
944    def __rmul__(self, other):
945        return self * other
946
947    def __truediv__(self, other):
948        if isinstance(other, np.ndarray):
949            return 1 / (other / self)
950        elif hasattr(other, 'real') and hasattr(other, 'imag'):
951            r = other.real ** 2 + other.imag ** 2
952            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
953        else:
954            return CObs(self.real / other, self.imag / other)
955
956    def __rtruediv__(self, other):
957        r = self.real ** 2 + self.imag ** 2
958        if hasattr(other, 'real') and hasattr(other, 'imag'):
959            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
960        else:
961            return CObs(self.real * other / r, -self.imag * other / r)
962
963    def __abs__(self):
964        return np.sqrt(self.real**2 + self.imag**2)
965
966    def __pos__(self):
967        return self
968
969    def __neg__(self):
970        return -1 * self
971
972    def __eq__(self, other):
973        return self.real == other.real and self.imag == other.imag
974
975    def __str__(self):
976        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
977
978    def __repr__(self):
979        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0)
875    def __init__(self, real, imag=0.0):
876        self._real = real
877        self._imag = imag
878        self.tag = None
def gamma_method(self, **kwargs):
888    def gamma_method(self, **kwargs):
889        """Executes the gamma_method for the real and the imaginary part."""
890        if isinstance(self.real, Obs):
891            self.real.gamma_method(**kwargs)
892        if isinstance(self.imag, Obs):
893            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self):
895    def is_zero(self):
896        """Checks whether both real and imaginary part are zero within machine precision."""
897        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self):
899    def conjugate(self):
900        return CObs(self.real, -self.imag)
def derived_observable(func, data, array_mode=False, **kwargs):
1104def derived_observable(func, data, array_mode=False, **kwargs):
1105    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1106
1107    Parameters
1108    ----------
1109    func : object
1110        arbitrary function of the form func(data, **kwargs). For the
1111        automatic differentiation to work, all numpy functions have to have
1112        the autograd wrapper (use 'import autograd.numpy as anp').
1113    data : list
1114        list of Obs, e.g. [obs1, obs2, obs3].
1115    num_grad : bool
1116        if True, numerical derivatives are used instead of autograd
1117        (default False). To control the numerical differentiation the
1118        kwargs of numdifftools.step_generators.MaxStepGenerator
1119        can be used.
1120    man_grad : list
1121        manually supply a list or an array which contains the jacobian
1122        of func. Use cautiously, supplying the wrong derivative will
1123        not be intercepted.
1124
1125    Notes
1126    -----
1127    For simple mathematical operations it can be practical to use anonymous
1128    functions. For the ratio of two observables one can e.g. use
1129
1130    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1131    """
1132
1133    data = np.asarray(data)
1134    raveled_data = data.ravel()
1135
1136    # Workaround for matrix operations containing non Obs data
1137    if not all(isinstance(x, Obs) for x in raveled_data):
1138        for i in range(len(raveled_data)):
1139            if isinstance(raveled_data[i], (int, float)):
1140                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1141
1142    allcov = {}
1143    for o in raveled_data:
1144        for name in o.cov_names:
1145            if name in allcov:
1146                if not np.allclose(allcov[name], o.covobs[name].cov):
1147                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1148            else:
1149                allcov[name] = o.covobs[name].cov
1150
1151    n_obs = len(raveled_data)
1152    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1153    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1154    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1155
1156    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1157
1158    if data.ndim == 1:
1159        values = np.array([o.value for o in data])
1160    else:
1161        values = np.vectorize(lambda x: x.value)(data)
1162
1163    new_values = func(values, **kwargs)
1164
1165    multi = int(isinstance(new_values, np.ndarray))
1166
1167    new_r_values = {}
1168    new_idl_d = {}
1169    for name in new_sample_names:
1170        idl = []
1171        tmp_values = np.zeros(n_obs)
1172        for i, item in enumerate(raveled_data):
1173            tmp_values[i] = item.r_values.get(name, item.value)
1174            tmp_idl = item.idl.get(name)
1175            if tmp_idl is not None:
1176                idl.append(tmp_idl)
1177        if multi > 0:
1178            tmp_values = np.array(tmp_values).reshape(data.shape)
1179        new_r_values[name] = func(tmp_values, **kwargs)
1180        new_idl_d[name] = _merge_idx(idl)
1181
1182    if 'man_grad' in kwargs:
1183        deriv = np.asarray(kwargs.get('man_grad'))
1184        if new_values.shape + data.shape != deriv.shape:
1185            raise Exception('Manual derivative does not have correct shape.')
1186    elif kwargs.get('num_grad') is True:
1187        if multi > 0:
1188            raise Exception('Multi mode currently not supported for numerical derivative')
1189        options = {
1190            'base_step': 0.1,
1191            'step_ratio': 2.5}
1192        for key in options.keys():
1193            kwarg = kwargs.get(key)
1194            if kwarg is not None:
1195                options[key] = kwarg
1196        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1197        if tmp_df.size == 1:
1198            deriv = np.array([tmp_df.real])
1199        else:
1200            deriv = tmp_df.real
1201    else:
1202        deriv = jacobian(func)(values, **kwargs)
1203
1204    final_result = np.zeros(new_values.shape, dtype=object)
1205
1206    if array_mode is True:
1207
1208        class _Zero_grad():
1209            def __init__(self, N):
1210                self.grad = np.zeros((N, 1))
1211
1212        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1213        d_extracted = {}
1214        g_extracted = {}
1215        for name in new_sample_names:
1216            d_extracted[name] = []
1217            ens_length = len(new_idl_d[name])
1218            for i_dat, dat in enumerate(data):
1219                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1220        for name in new_cov_names:
1221            g_extracted[name] = []
1222            zero_grad = _Zero_grad(new_covobs_lengths[name])
1223            for i_dat, dat in enumerate(data):
1224                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1225
1226    for i_val, new_val in np.ndenumerate(new_values):
1227        new_deltas = {}
1228        new_grad = {}
1229        if array_mode is True:
1230            for name in new_sample_names:
1231                ens_length = d_extracted[name][0].shape[-1]
1232                new_deltas[name] = np.zeros(ens_length)
1233                for i_dat, dat in enumerate(d_extracted[name]):
1234                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1235            for name in new_cov_names:
1236                new_grad[name] = 0
1237                for i_dat, dat in enumerate(g_extracted[name]):
1238                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1239        else:
1240            for j_obs, obs in np.ndenumerate(data):
1241                for name in obs.names:
1242                    if name in obs.cov_names:
1243                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1244                    else:
1245                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1246
1247        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1248
1249        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1250            raise Exception('The same name has been used for deltas and covobs!')
1251        new_samples = []
1252        new_means = []
1253        new_idl = []
1254        new_names_obs = []
1255        for name in new_names:
1256            if name not in new_covobs:
1257                new_samples.append(new_deltas[name])
1258                new_idl.append(new_idl_d[name])
1259                new_means.append(new_r_values[name][i_val])
1260                new_names_obs.append(name)
1261        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1262        for name in new_covobs:
1263            final_result[i_val].names.append(name)
1264        final_result[i_val]._covobs = new_covobs
1265        final_result[i_val]._value = new_val
1266        final_result[i_val].reweighted = reweighted
1267
1268    if multi == 0:
1269        final_result = final_result.item()
1270
1271    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs):
1308def reweight(weight, obs, **kwargs):
1309    """Reweight a list of observables.
1310
1311    Parameters
1312    ----------
1313    weight : Obs
1314        Reweighting factor. An Observable that has to be defined on a superset of the
1315        configurations in obs[i].idl for all i.
1316    obs : list
1317        list of Obs, e.g. [obs1, obs2, obs3].
1318    all_configs : bool
1319        if True, the reweighted observables are normalized by the average of
1320        the reweighting factor on all configurations in weight.idl and not
1321        on the configurations in obs[i].idl. Default False.
1322    """
1323    result = []
1324    for i in range(len(obs)):
1325        if len(obs[i].cov_names):
1326            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1327        if not set(obs[i].names).issubset(weight.names):
1328            raise Exception('Error: Ensembles do not fit')
1329        for name in obs[i].names:
1330            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1331                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1332        new_samples = []
1333        w_deltas = {}
1334        for name in sorted(obs[i].names):
1335            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1336            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1337        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1338
1339        if kwargs.get('all_configs'):
1340            new_weight = weight
1341        else:
1342            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1343
1344        result.append(tmp_obs / new_weight)
1345        result[-1].reweighted = True
1346
1347    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def correlate(obs_a, obs_b):
1350def correlate(obs_a, obs_b):
1351    """Correlate two observables.
1352
1353    Parameters
1354    ----------
1355    obs_a : Obs
1356        First observable
1357    obs_b : Obs
1358        Second observable
1359
1360    Notes
1361    -----
1362    Keep in mind to only correlate primary observables which have not been reweighted
1363    yet. The reweighting has to be applied after correlating the observables.
1364    Currently only works if ensembles are identical (this is not strictly necessary).
1365    """
1366
1367    if sorted(obs_a.names) != sorted(obs_b.names):
1368        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1369    if len(obs_a.cov_names) or len(obs_b.cov_names):
1370        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1371    for name in obs_a.names:
1372        if obs_a.shape[name] != obs_b.shape[name]:
1373            raise Exception('Shapes of ensemble', name, 'do not fit')
1374        if obs_a.idl[name] != obs_b.idl[name]:
1375            raise Exception('idl of ensemble', name, 'do not fit')
1376
1377    if obs_a.reweighted is True:
1378        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1379    if obs_b.reweighted is True:
1380        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1381
1382    new_samples = []
1383    new_idl = []
1384    for name in sorted(obs_a.names):
1385        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1386        new_idl.append(obs_a.idl[name])
1387
1388    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1389    o.reweighted = obs_a.reweighted or obs_b.reweighted
1390    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1393def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1394    r'''Calculates the error covariance matrix of a set of observables.
1395
1396    WARNING: This function should be used with care, especially for observables with support on multiple
1397             ensembles with differing autocorrelations. See the notes below for details.
1398
1399    The gamma method has to be applied first to all observables.
1400
1401    Parameters
1402    ----------
1403    obs : list or numpy.ndarray
1404        List or one dimensional array of Obs
1405    visualize : bool
1406        If True plots the corresponding normalized correlation matrix (default False).
1407    correlation : bool
1408        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1409    smooth : None or int
1410        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1411        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1412        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1413        small ones.
1414
1415    Notes
1416    -----
1417    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1418    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1419    in the absence of autocorrelation.
1420    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1421    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1422    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1423    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1424    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1425    '''
1426
1427    length = len(obs)
1428
1429    max_samples = np.max([o.N for o in obs])
1430    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1431        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1432
1433    cov = np.zeros((length, length))
1434    for i in range(length):
1435        for j in range(i, length):
1436            cov[i, j] = _covariance_element(obs[i], obs[j])
1437    cov = cov + cov.T - np.diag(np.diag(cov))
1438
1439    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1440
1441    if isinstance(smooth, int):
1442        corr = _smooth_eigenvalues(corr, smooth)
1443
1444    if visualize:
1445        plt.matshow(corr, vmin=-1, vmax=1)
1446        plt.set_cmap('RdBu')
1447        plt.colorbar()
1448        plt.draw()
1449
1450    if correlation is True:
1451        return corr
1452
1453    errors = [o.dvalue for o in obs]
1454    cov = np.diag(errors) @ corr @ np.diag(errors)
1455
1456    eigenvalues = np.linalg.eigh(cov)[0]
1457    if not np.all(eigenvalues >= 0):
1458        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1459
1460    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None):
1540def import_jackknife(jacks, name, idl=None):
1541    """Imports jackknife samples and returns an Obs
1542
1543    Parameters
1544    ----------
1545    jacks : numpy.ndarray
1546        numpy array containing the mean value as zeroth entry and
1547        the N jackknife samples as first to Nth entry.
1548    name : str
1549        name of the ensemble the samples are defined on.
1550    """
1551    length = len(jacks) - 1
1552    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1553    samples = jacks[1:] @ prj
1554    mean = np.mean(samples)
1555    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1556    new_obs._value = jacks[0]
1557    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def merge_obs(list_of_obs):
1560def merge_obs(list_of_obs):
1561    """Combine all observables in list_of_obs into one new observable
1562
1563    Parameters
1564    ----------
1565    list_of_obs : list
1566        list of the Obs object to be combined
1567
1568    Notes
1569    -----
1570    It is not possible to combine obs which are based on the same replicum
1571    """
1572    replist = [item for obs in list_of_obs for item in obs.names]
1573    if (len(replist) == len(set(replist))) is False:
1574        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1575    if any([len(o.cov_names) for o in list_of_obs]):
1576        raise Exception('Not possible to merge data that contains covobs!')
1577    new_dict = {}
1578    idl_dict = {}
1579    for o in list_of_obs:
1580        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1581                        for key in set(o.deltas) | set(o.r_values)})
1582        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1583
1584    names = sorted(new_dict.keys())
1585    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1586    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1587    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None):
1590def cov_Obs(means, cov, name, grad=None):
1591    """Create an Obs based on mean(s) and a covariance matrix
1592
1593    Parameters
1594    ----------
1595    mean : list of floats or float
1596        N mean value(s) of the new Obs
1597    cov : list or array
1598        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1599    name : str
1600        identifier for the covariance matrix
1601    grad : list or array
1602        Gradient of the Covobs wrt. the means belonging to cov.
1603    """
1604
1605    def covobs_to_obs(co):
1606        """Make an Obs out of a Covobs
1607
1608        Parameters
1609        ----------
1610        co : Covobs
1611            Covobs to be embedded into the Obs
1612        """
1613        o = Obs([], [], means=[])
1614        o._value = co.value
1615        o.names.append(co.name)
1616        o._covobs[co.name] = co
1617        o._dvalue = np.sqrt(co.errsq())
1618        return o
1619
1620    ol = []
1621    if isinstance(means, (float, int)):
1622        means = [means]
1623
1624    for i in range(len(means)):
1625        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1626    if ol[0].covobs[name].N != len(means):
1627        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1628    if len(ol) == 1:
1629        return ol[0]
1630    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.