pyerrors.obs

   1import warnings
   2import hashlib
   3import pickle
   4import numpy as np
   5import autograd.numpy as anp  # Thinly-wrapped numpy
   6from autograd import jacobian
   7import matplotlib.pyplot as plt
   8from scipy.stats import skew, skewtest, kurtosis, kurtosistest
   9import numdifftools as nd
  10from itertools import groupby
  11from .covobs import Covobs
  12
  13# Improve print output of numpy.ndarrays containing Obs objects.
  14np.set_printoptions(formatter={'object': lambda x: str(x)})
  15
  16
  17class Obs:
  18    """Class for a general observable.
  19
  20    Instances of Obs are the basic objects of a pyerrors error analysis.
  21    They are initialized with a list which contains arrays of samples for
  22    different ensembles/replica and another list of same length which contains
  23    the names of the ensembles/replica. Mathematical operations can be
  24    performed on instances. The result is another instance of Obs. The error of
  25    an instance can be computed with the gamma_method. Also contains additional
  26    methods for output and visualization of the error calculation.
  27
  28    Attributes
  29    ----------
  30    S_global : float
  31        Standard value for S (default 2.0)
  32    S_dict : dict
  33        Dictionary for S values. If an entry for a given ensemble
  34        exists this overwrites the standard value for that ensemble.
  35    tau_exp_global : float
  36        Standard value for tau_exp (default 0.0)
  37    tau_exp_dict : dict
  38        Dictionary for tau_exp values. If an entry for a given ensemble exists
  39        this overwrites the standard value for that ensemble.
  40    N_sigma_global : float
  41        Standard value for N_sigma (default 1.0)
  42    N_sigma_dict : dict
  43        Dictionary for N_sigma values. If an entry for a given ensemble exists
  44        this overwrites the standard value for that ensemble.
  45    """
  46    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  47                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  48                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  49                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  50                 'idl', 'tag', '_covobs', '__dict__']
  51
  52    S_global = 2.0
  53    S_dict = {}
  54    tau_exp_global = 0.0
  55    tau_exp_dict = {}
  56    N_sigma_global = 1.0
  57    N_sigma_dict = {}
  58
  59    def __init__(self, samples, names, idl=None, **kwargs):
  60        """ Initialize Obs object.
  61
  62        Parameters
  63        ----------
  64        samples : list
  65            list of numpy arrays containing the Monte Carlo samples
  66        names : list
  67            list of strings labeling the individual samples
  68        idl : list, optional
  69            list of ranges or lists on which the samples are defined
  70        """
  71
  72        if kwargs.get("means") is None and len(samples):
  73            if len(samples) != len(names):
  74                raise ValueError('Length of samples and names incompatible.')
  75            if idl is not None:
  76                if len(idl) != len(names):
  77                    raise ValueError('Length of idl incompatible with samples and names.')
  78            name_length = len(names)
  79            if name_length > 1:
  80                if name_length != len(set(names)):
  81                    raise ValueError('Names are not unique.')
  82                if not all(isinstance(x, str) for x in names):
  83                    raise TypeError('All names have to be strings.')
  84            else:
  85                if not isinstance(names[0], str):
  86                    raise TypeError('All names have to be strings.')
  87            if min(len(x) for x in samples) <= 4:
  88                raise ValueError('Samples have to have at least 5 entries.')
  89
  90        self.names = sorted(names)
  91        self.shape = {}
  92        self.r_values = {}
  93        self.deltas = {}
  94        self._covobs = {}
  95
  96        self._value = 0
  97        self.N = 0
  98        self.idl = {}
  99        if idl is not None:
 100            for name, idx in sorted(zip(names, idl)):
 101                if isinstance(idx, range):
 102                    self.idl[name] = idx
 103                elif isinstance(idx, (list, np.ndarray)):
 104                    dc = np.unique(np.diff(idx))
 105                    if np.any(dc < 0):
 106                        raise ValueError("Unsorted idx for idl[%s]" % (name))
 107                    if len(dc) == 1:
 108                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 109                    else:
 110                        self.idl[name] = list(idx)
 111                else:
 112                    raise TypeError('incompatible type for idl[%s].' % (name))
 113        else:
 114            for name, sample in sorted(zip(names, samples)):
 115                self.idl[name] = range(1, len(sample) + 1)
 116
 117        if kwargs.get("means") is not None:
 118            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 119                self.shape[name] = len(self.idl[name])
 120                self.N += self.shape[name]
 121                self.r_values[name] = mean
 122                self.deltas[name] = sample
 123        else:
 124            for name, sample in sorted(zip(names, samples)):
 125                self.shape[name] = len(self.idl[name])
 126                self.N += self.shape[name]
 127                if len(sample) != self.shape[name]:
 128                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 129                self.r_values[name] = np.mean(sample)
 130                self.deltas[name] = sample - self.r_values[name]
 131                self._value += self.shape[name] * self.r_values[name]
 132            self._value /= self.N
 133
 134        self._dvalue = 0.0
 135        self.ddvalue = 0.0
 136        self.reweighted = False
 137
 138        self.tag = None
 139
 140    @property
 141    def value(self):
 142        return self._value
 143
 144    @property
 145    def dvalue(self):
 146        return self._dvalue
 147
 148    @property
 149    def e_names(self):
 150        return sorted(set([o.split('|')[0] for o in self.names]))
 151
 152    @property
 153    def cov_names(self):
 154        return sorted(set([o for o in self.covobs.keys()]))
 155
 156    @property
 157    def mc_names(self):
 158        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 159
 160    @property
 161    def e_content(self):
 162        res = {}
 163        for e, e_name in enumerate(self.e_names):
 164            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 165            if e_name in self.names:
 166                res[e_name].append(e_name)
 167        return res
 168
 169    @property
 170    def covobs(self):
 171        return self._covobs
 172
 173    def gamma_method(self, **kwargs):
 174        """Estimate the error and related properties of the Obs.
 175
 176        Parameters
 177        ----------
 178        S : float
 179            specifies a custom value for the parameter S (default 2.0).
 180            If set to 0 it is assumed that the data exhibits no
 181            autocorrelation. In this case the error estimates coincides
 182            with the sample standard error.
 183        tau_exp : float
 184            positive value triggers the critical slowing down analysis
 185            (default 0.0).
 186        N_sigma : float
 187            number of standard deviations from zero until the tail is
 188            attached to the autocorrelation function (default 1).
 189        fft : bool
 190            determines whether the fft algorithm is used for the computation
 191            of the autocorrelation function (default True)
 192        """
 193
 194        e_content = self.e_content
 195        self.e_dvalue = {}
 196        self.e_ddvalue = {}
 197        self.e_tauint = {}
 198        self.e_dtauint = {}
 199        self.e_windowsize = {}
 200        self.e_n_tauint = {}
 201        self.e_n_dtauint = {}
 202        e_gamma = {}
 203        self.e_rho = {}
 204        self.e_drho = {}
 205        self._dvalue = 0
 206        self.ddvalue = 0
 207
 208        self.S = {}
 209        self.tau_exp = {}
 210        self.N_sigma = {}
 211
 212        if kwargs.get('fft') is False:
 213            fft = False
 214        else:
 215            fft = True
 216
 217        def _parse_kwarg(kwarg_name):
 218            if kwarg_name in kwargs:
 219                tmp = kwargs.get(kwarg_name)
 220                if isinstance(tmp, (int, float)):
 221                    if tmp < 0:
 222                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 223                    for e, e_name in enumerate(self.e_names):
 224                        getattr(self, kwarg_name)[e_name] = tmp
 225                else:
 226                    raise TypeError(kwarg_name + ' is not in proper format.')
 227            else:
 228                for e, e_name in enumerate(self.e_names):
 229                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 230                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 231                    else:
 232                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 233
 234        _parse_kwarg('S')
 235        _parse_kwarg('tau_exp')
 236        _parse_kwarg('N_sigma')
 237
 238        for e, e_name in enumerate(self.mc_names):
 239            gapsize = _determine_gap(self, e_content, e_name)
 240
 241            r_length = []
 242            for r_name in e_content[e_name]:
 243                if isinstance(self.idl[r_name], range):
 244                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
 245                else:
 246                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
 247
 248            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 249            w_max = max(r_length) // 2
 250            e_gamma[e_name] = np.zeros(w_max)
 251            self.e_rho[e_name] = np.zeros(w_max)
 252            self.e_drho[e_name] = np.zeros(w_max)
 253
 254            for r_name in e_content[e_name]:
 255                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 256
 257            gamma_div = np.zeros(w_max)
 258            for r_name in e_content[e_name]:
 259                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 260            gamma_div[gamma_div < 1] = 1.0
 261            e_gamma[e_name] /= gamma_div[:w_max]
 262
 263            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 264                self.e_tauint[e_name] = 0.5
 265                self.e_dtauint[e_name] = 0.0
 266                self.e_dvalue[e_name] = 0.0
 267                self.e_ddvalue[e_name] = 0.0
 268                self.e_windowsize[e_name] = 0
 269                continue
 270
 271            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 272            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 273            # Make sure no entry of tauint is smaller than 0.5
 274            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 275            # hep-lat/0306017 eq. (42)
 276            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 277            self.e_n_dtauint[e_name][0] = 0.0
 278
 279            def _compute_drho(i):
 280                tmp = (self.e_rho[e_name][i + 1:w_max]
 281                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 <= 0 else 2 * (i - w_max // 2):-1],
 282                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
 283                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
 284                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 285
 286            if self.tau_exp[e_name] > 0:
 287                _compute_drho(1)
 288                texp = self.tau_exp[e_name]
 289                # Critical slowing down analysis
 290                if w_max // 2 <= 1:
 291                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 292                for n in range(1, w_max // 2):
 293                    _compute_drho(n + 1)
 294                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 295                        # Bias correction hep-lat/0306017 eq. (49) included
 296                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 297                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 298                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 299                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 300                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 301                        self.e_windowsize[e_name] = n
 302                        break
 303            else:
 304                if self.S[e_name] == 0.0:
 305                    self.e_tauint[e_name] = 0.5
 306                    self.e_dtauint[e_name] = 0.0
 307                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 308                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 309                    self.e_windowsize[e_name] = 0
 310                else:
 311                    # Standard automatic windowing procedure
 312                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 313                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 314                    for n in range(1, w_max):
 315                        if g_w[n - 1] < 0 or n >= w_max - 1:
 316                            _compute_drho(n)
 317                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 318                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 319                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 320                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 321                            self.e_windowsize[e_name] = n
 322                            break
 323
 324            self._dvalue += self.e_dvalue[e_name] ** 2
 325            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 326
 327        for e_name in self.cov_names:
 328            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 329            self.e_ddvalue[e_name] = 0
 330            self._dvalue += self.e_dvalue[e_name]**2
 331
 332        self._dvalue = np.sqrt(self._dvalue)
 333        if self._dvalue == 0.0:
 334            self.ddvalue = 0.0
 335        else:
 336            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 337        return
 338
 339    gm = gamma_method
 340
 341    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
 342        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 343           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 344
 345        Parameters
 346        ----------
 347        deltas : list
 348            List of fluctuations
 349        idx : list
 350            List or range of configurations on which the deltas are defined.
 351        shape : int
 352            Number of configurations in idx.
 353        w_max : int
 354            Upper bound for the summation window.
 355        fft : bool
 356            determines whether the fft algorithm is used for the computation
 357            of the autocorrelation function.
 358        gapsize : int
 359            The target distance between two configurations. If longer distances
 360            are found in idx, the data is expanded.
 361        """
 362        gamma = np.zeros(w_max)
 363        deltas = _expand_deltas(deltas, idx, shape, gapsize)
 364        new_shape = len(deltas)
 365        if fft:
 366            max_gamma = min(new_shape, w_max)
 367            # The padding for the fft has to be even
 368            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 369            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 370        else:
 371            for n in range(w_max):
 372                if new_shape - n >= 0:
 373                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 374
 375        return gamma
 376
 377    def details(self, ens_content=True):
 378        """Output detailed properties of the Obs.
 379
 380        Parameters
 381        ----------
 382        ens_content : bool
 383            print details about the ensembles and replica if true.
 384        """
 385        if self.tag is not None:
 386            print("Description:", self.tag)
 387        if not hasattr(self, 'e_dvalue'):
 388            print('Result\t %3.8e' % (self.value))
 389        else:
 390            if self.value == 0.0:
 391                percentage = np.nan
 392            else:
 393                percentage = np.abs(self._dvalue / self.value) * 100
 394            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 395            if len(self.e_names) > 1:
 396                print(' Ensemble errors:')
 397            e_content = self.e_content
 398            for e_name in self.mc_names:
 399                gap = _determine_gap(self, e_content, e_name)
 400
 401                if len(self.e_names) > 1:
 402                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 403                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 404                tau_string += f" in units of {gap} config"
 405                if gap > 1:
 406                    tau_string += "s"
 407                if self.tau_exp[e_name] > 0:
 408                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 409                else:
 410                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 411                print(tau_string)
 412            for e_name in self.cov_names:
 413                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 414        if ens_content is True:
 415            if len(self.e_names) == 1:
 416                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 417            else:
 418                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 419            my_string_list = []
 420            for key, value in sorted(self.e_content.items()):
 421                if key not in self.covobs:
 422                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 423                    if len(value) == 1:
 424                        my_string += f': {self.shape[value[0]]} configurations'
 425                        if isinstance(self.idl[value[0]], range):
 426                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 427                        else:
 428                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 429                    else:
 430                        sublist = []
 431                        for v in value:
 432                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 433                            my_substring += f': {self.shape[v]} configurations'
 434                            if isinstance(self.idl[v], range):
 435                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 436                            else:
 437                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 438                            sublist.append(my_substring)
 439
 440                        my_string += '\n' + '\n'.join(sublist)
 441                else:
 442                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 443                my_string_list.append(my_string)
 444            print('\n'.join(my_string_list))
 445
 446    def reweight(self, weight):
 447        """Reweight the obs with given rewighting factors.
 448
 449        Parameters
 450        ----------
 451        weight : Obs
 452            Reweighting factor. An Observable that has to be defined on a superset of the
 453            configurations in obs[i].idl for all i.
 454        all_configs : bool
 455            if True, the reweighted observables are normalized by the average of
 456            the reweighting factor on all configurations in weight.idl and not
 457            on the configurations in obs[i].idl. Default False.
 458        """
 459        return reweight(weight, [self])[0]
 460
 461    def is_zero_within_error(self, sigma=1):
 462        """Checks whether the observable is zero within 'sigma' standard errors.
 463
 464        Parameters
 465        ----------
 466        sigma : int
 467            Number of standard errors used for the check.
 468
 469        Works only properly when the gamma method was run.
 470        """
 471        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 472
 473    def is_zero(self, atol=1e-10):
 474        """Checks whether the observable is zero within a given tolerance.
 475
 476        Parameters
 477        ----------
 478        atol : float
 479            Absolute tolerance (for details see numpy documentation).
 480        """
 481        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 482
 483    def plot_tauint(self, save=None):
 484        """Plot integrated autocorrelation time for each ensemble.
 485
 486        Parameters
 487        ----------
 488        save : str
 489            saves the figure to a file named 'save' if.
 490        """
 491        if not hasattr(self, 'e_dvalue'):
 492            raise Exception('Run the gamma method first.')
 493
 494        for e, e_name in enumerate(self.mc_names):
 495            fig = plt.figure()
 496            plt.xlabel(r'$W$')
 497            plt.ylabel(r'$\tau_\mathrm{int}$')
 498            length = int(len(self.e_n_tauint[e_name]))
 499            if self.tau_exp[e_name] > 0:
 500                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 501                x_help = np.arange(2 * self.tau_exp[e_name])
 502                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 503                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 504                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 505                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 506                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 507                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 508                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 509            else:
 510                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 511                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 512
 513            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 514            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 515            plt.legend()
 516            plt.xlim(-0.5, xmax)
 517            ylim = plt.ylim()
 518            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 519            plt.draw()
 520            if save:
 521                fig.savefig(save + "_" + str(e))
 522
 523    def plot_rho(self, save=None):
 524        """Plot normalized autocorrelation function time for each ensemble.
 525
 526        Parameters
 527        ----------
 528        save : str
 529            saves the figure to a file named 'save' if.
 530        """
 531        if not hasattr(self, 'e_dvalue'):
 532            raise Exception('Run the gamma method first.')
 533        for e, e_name in enumerate(self.mc_names):
 534            fig = plt.figure()
 535            plt.xlabel('W')
 536            plt.ylabel('rho')
 537            length = int(len(self.e_drho[e_name]))
 538            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 539            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 540            if self.tau_exp[e_name] > 0:
 541                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 542                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 543                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 544                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 545            else:
 546                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 547                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 548            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 549            plt.xlim(-0.5, xmax)
 550            plt.draw()
 551            if save:
 552                fig.savefig(save + "_" + str(e))
 553
 554    def plot_rep_dist(self):
 555        """Plot replica distribution for each ensemble with more than one replicum."""
 556        if not hasattr(self, 'e_dvalue'):
 557            raise Exception('Run the gamma method first.')
 558        for e, e_name in enumerate(self.mc_names):
 559            if len(self.e_content[e_name]) == 1:
 560                print('No replica distribution for a single replicum (', e_name, ')')
 561                continue
 562            r_length = []
 563            sub_r_mean = 0
 564            for r, r_name in enumerate(self.e_content[e_name]):
 565                r_length.append(len(self.deltas[r_name]))
 566                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 567            e_N = np.sum(r_length)
 568            sub_r_mean /= e_N
 569            arr = np.zeros(len(self.e_content[e_name]))
 570            for r, r_name in enumerate(self.e_content[e_name]):
 571                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 572            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 573            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 574            plt.draw()
 575
 576    def plot_history(self, expand=True):
 577        """Plot derived Monte Carlo history for each ensemble
 578
 579        Parameters
 580        ----------
 581        expand : bool
 582            show expanded history for irregular Monte Carlo chains (default: True).
 583        """
 584        for e, e_name in enumerate(self.mc_names):
 585            plt.figure()
 586            r_length = []
 587            tmp = []
 588            tmp_expanded = []
 589            for r, r_name in enumerate(self.e_content[e_name]):
 590                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 591                if expand:
 592                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
 593                    r_length.append(len(tmp_expanded[-1]))
 594                else:
 595                    r_length.append(len(tmp[-1]))
 596            e_N = np.sum(r_length)
 597            x = np.arange(e_N)
 598            y_test = np.concatenate(tmp, axis=0)
 599            if expand:
 600                y = np.concatenate(tmp_expanded, axis=0)
 601            else:
 602                y = y_test
 603            plt.errorbar(x, y, fmt='.', markersize=3)
 604            plt.xlim(-0.5, e_N - 0.5)
 605            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 606            plt.draw()
 607
 608    def plot_piechart(self, save=None):
 609        """Plot piechart which shows the fractional contribution of each
 610        ensemble to the error and returns a dictionary containing the fractions.
 611
 612        Parameters
 613        ----------
 614        save : str
 615            saves the figure to a file named 'save' if.
 616        """
 617        if not hasattr(self, 'e_dvalue'):
 618            raise Exception('Run the gamma method first.')
 619        if np.isclose(0.0, self._dvalue, atol=1e-15):
 620            raise Exception('Error is 0.0')
 621        labels = self.e_names
 622        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 623        fig1, ax1 = plt.subplots()
 624        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 625        ax1.axis('equal')
 626        plt.draw()
 627        if save:
 628            fig1.savefig(save)
 629
 630        return dict(zip(self.e_names, sizes))
 631
 632    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 633        """Dump the Obs to a file 'name' of chosen format.
 634
 635        Parameters
 636        ----------
 637        filename : str
 638            name of the file to be saved.
 639        datatype : str
 640            Format of the exported file. Supported formats include
 641            "json.gz" and "pickle"
 642        description : str
 643            Description for output file, only relevant for json.gz format.
 644        path : str
 645            specifies a custom path for the file (default '.')
 646        """
 647        if 'path' in kwargs:
 648            file_name = kwargs.get('path') + '/' + filename
 649        else:
 650            file_name = filename
 651
 652        if datatype == "json.gz":
 653            from .input.json import dump_to_json
 654            dump_to_json([self], file_name, description=description)
 655        elif datatype == "pickle":
 656            with open(file_name + '.p', 'wb') as fb:
 657                pickle.dump(self, fb)
 658        else:
 659            raise Exception("Unknown datatype " + str(datatype))
 660
 661    def export_jackknife(self):
 662        """Export jackknife samples from the Obs
 663
 664        Returns
 665        -------
 666        numpy.ndarray
 667            Returns a numpy array of length N + 1 where N is the number of samples
 668            for the given ensemble and replicum. The zeroth entry of the array contains
 669            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 670            derived from the Obs. The current implementation only works for observables
 671            defined on exactly one ensemble and replicum. The derived jackknife samples
 672            should agree with samples from a full jackknife analysis up to O(1/N).
 673        """
 674
 675        if len(self.names) != 1:
 676            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 677
 678        name = self.names[0]
 679        full_data = self.deltas[name] + self.r_values[name]
 680        n = full_data.size
 681        mean = self.value
 682        tmp_jacks = np.zeros(n + 1)
 683        tmp_jacks[0] = mean
 684        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 685        return tmp_jacks
 686
 687    def __float__(self):
 688        return float(self.value)
 689
 690    def __repr__(self):
 691        return 'Obs[' + str(self) + ']'
 692
 693    def __str__(self):
 694        return _format_uncertainty(self.value, self._dvalue)
 695
 696    def __format__(self, format_type):
 697        my_str = _format_uncertainty(self.value, self._dvalue,
 698                                     significance=int(float(format_type.replace("+", "").replace("-", ""))))
 699        for char in ["+", " "]:
 700            if format_type.startswith(char):
 701                if my_str[0] != "-":
 702                    my_str = char + my_str
 703        return my_str
 704
 705    def __hash__(self):
 706        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 707        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 708        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 709        hash_tuple += tuple([o.encode() for o in self.names])
 710        m = hashlib.md5()
 711        [m.update(o) for o in hash_tuple]
 712        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 713
 714    # Overload comparisons
 715    def __lt__(self, other):
 716        return self.value < other
 717
 718    def __le__(self, other):
 719        return self.value <= other
 720
 721    def __gt__(self, other):
 722        return self.value > other
 723
 724    def __ge__(self, other):
 725        return self.value >= other
 726
 727    def __eq__(self, other):
 728        return (self - other).is_zero()
 729
 730    def __ne__(self, other):
 731        return not (self - other).is_zero()
 732
 733    # Overload math operations
 734    def __add__(self, y):
 735        if isinstance(y, Obs):
 736            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 737        else:
 738            if isinstance(y, np.ndarray):
 739                return np.array([self + o for o in y])
 740            elif y.__class__.__name__ in ['Corr', 'CObs']:
 741                return NotImplemented
 742            else:
 743                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 744
 745    def __radd__(self, y):
 746        return self + y
 747
 748    def __mul__(self, y):
 749        if isinstance(y, Obs):
 750            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 751        else:
 752            if isinstance(y, np.ndarray):
 753                return np.array([self * o for o in y])
 754            elif isinstance(y, complex):
 755                return CObs(self * y.real, self * y.imag)
 756            elif y.__class__.__name__ in ['Corr', 'CObs']:
 757                return NotImplemented
 758            else:
 759                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 760
 761    def __rmul__(self, y):
 762        return self * y
 763
 764    def __sub__(self, y):
 765        if isinstance(y, Obs):
 766            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 767        else:
 768            if isinstance(y, np.ndarray):
 769                return np.array([self - o for o in y])
 770            elif y.__class__.__name__ in ['Corr', 'CObs']:
 771                return NotImplemented
 772            else:
 773                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 774
 775    def __rsub__(self, y):
 776        return -1 * (self - y)
 777
 778    def __pos__(self):
 779        return self
 780
 781    def __neg__(self):
 782        return -1 * self
 783
 784    def __truediv__(self, y):
 785        if isinstance(y, Obs):
 786            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 787        else:
 788            if isinstance(y, np.ndarray):
 789                return np.array([self / o for o in y])
 790            elif y.__class__.__name__ in ['Corr', 'CObs']:
 791                return NotImplemented
 792            else:
 793                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 794
 795    def __rtruediv__(self, y):
 796        if isinstance(y, Obs):
 797            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 798        else:
 799            if isinstance(y, np.ndarray):
 800                return np.array([o / self for o in y])
 801            elif y.__class__.__name__ in ['Corr', 'CObs']:
 802                return NotImplemented
 803            else:
 804                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 805
 806    def __pow__(self, y):
 807        if isinstance(y, Obs):
 808            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 809        else:
 810            return derived_observable(lambda x: x[0] ** y, [self])
 811
 812    def __rpow__(self, y):
 813        if isinstance(y, Obs):
 814            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 815        else:
 816            return derived_observable(lambda x: y ** x[0], [self])
 817
 818    def __abs__(self):
 819        return derived_observable(lambda x: anp.abs(x[0]), [self])
 820
 821    # Overload numpy functions
 822    def sqrt(self):
 823        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 824
 825    def log(self):
 826        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 827
 828    def exp(self):
 829        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 830
 831    def sin(self):
 832        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 833
 834    def cos(self):
 835        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 836
 837    def tan(self):
 838        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 839
 840    def arcsin(self):
 841        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 842
 843    def arccos(self):
 844        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 845
 846    def arctan(self):
 847        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 848
 849    def sinh(self):
 850        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 851
 852    def cosh(self):
 853        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 854
 855    def tanh(self):
 856        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 857
 858    def arcsinh(self):
 859        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 860
 861    def arccosh(self):
 862        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 863
 864    def arctanh(self):
 865        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 866
 867
 868class CObs:
 869    """Class for a complex valued observable."""
 870    __slots__ = ['_real', '_imag', 'tag']
 871
 872    def __init__(self, real, imag=0.0):
 873        self._real = real
 874        self._imag = imag
 875        self.tag = None
 876
 877    @property
 878    def real(self):
 879        return self._real
 880
 881    @property
 882    def imag(self):
 883        return self._imag
 884
 885    def gamma_method(self, **kwargs):
 886        """Executes the gamma_method for the real and the imaginary part."""
 887        if isinstance(self.real, Obs):
 888            self.real.gamma_method(**kwargs)
 889        if isinstance(self.imag, Obs):
 890            self.imag.gamma_method(**kwargs)
 891
 892    def is_zero(self):
 893        """Checks whether both real and imaginary part are zero within machine precision."""
 894        return self.real == 0.0 and self.imag == 0.0
 895
 896    def conjugate(self):
 897        return CObs(self.real, -self.imag)
 898
 899    def __add__(self, other):
 900        if isinstance(other, np.ndarray):
 901            return other + self
 902        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 903            return CObs(self.real + other.real,
 904                        self.imag + other.imag)
 905        else:
 906            return CObs(self.real + other, self.imag)
 907
 908    def __radd__(self, y):
 909        return self + y
 910
 911    def __sub__(self, other):
 912        if isinstance(other, np.ndarray):
 913            return -1 * (other - self)
 914        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 915            return CObs(self.real - other.real, self.imag - other.imag)
 916        else:
 917            return CObs(self.real - other, self.imag)
 918
 919    def __rsub__(self, other):
 920        return -1 * (self - other)
 921
 922    def __mul__(self, other):
 923        if isinstance(other, np.ndarray):
 924            return other * self
 925        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 926            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 927                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 928                                               [self.real, other.real, self.imag, other.imag],
 929                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 930                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 931                                               [self.real, other.real, self.imag, other.imag],
 932                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 933            elif getattr(other, 'imag', 0) != 0:
 934                return CObs(self.real * other.real - self.imag * other.imag,
 935                            self.imag * other.real + self.real * other.imag)
 936            else:
 937                return CObs(self.real * other.real, self.imag * other.real)
 938        else:
 939            return CObs(self.real * other, self.imag * other)
 940
 941    def __rmul__(self, other):
 942        return self * other
 943
 944    def __truediv__(self, other):
 945        if isinstance(other, np.ndarray):
 946            return 1 / (other / self)
 947        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 948            r = other.real ** 2 + other.imag ** 2
 949            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 950        else:
 951            return CObs(self.real / other, self.imag / other)
 952
 953    def __rtruediv__(self, other):
 954        r = self.real ** 2 + self.imag ** 2
 955        if hasattr(other, 'real') and hasattr(other, 'imag'):
 956            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 957        else:
 958            return CObs(self.real * other / r, -self.imag * other / r)
 959
 960    def __abs__(self):
 961        return np.sqrt(self.real**2 + self.imag**2)
 962
 963    def __pos__(self):
 964        return self
 965
 966    def __neg__(self):
 967        return -1 * self
 968
 969    def __eq__(self, other):
 970        return self.real == other.real and self.imag == other.imag
 971
 972    def __str__(self):
 973        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 974
 975    def __repr__(self):
 976        return 'CObs[' + str(self) + ']'
 977
 978
 979def _format_uncertainty(value, dvalue, significance=2):
 980    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
 981    if dvalue == 0.0:
 982        return str(value)
 983    if not isinstance(significance, int):
 984        raise TypeError("significance needs to be an integer.")
 985    if significance < 1:
 986        raise ValueError("significance needs to be larger than zero.")
 987    fexp = np.floor(np.log10(dvalue))
 988    if fexp < 0.0:
 989        return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f')
 990    elif fexp == 0.0:
 991        return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})"
 992    else:
 993        return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})"
 994
 995
 996def _expand_deltas(deltas, idx, shape, gapsize):
 997    """Expand deltas defined on idx to a regular range with spacing gapsize between two
 998       configurations and where holes are filled by 0.
 999       If idx is of type range, the deltas are not changed if the idx.step == gapsize.
1000
1001    Parameters
1002    ----------
1003    deltas : list
1004        List of fluctuations
1005    idx : list
1006        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1007    shape : int
1008        Number of configs in idx.
1009    gapsize : int
1010        The target distance between two configurations. If longer distances
1011        are found in idx, the data is expanded.
1012    """
1013    if isinstance(idx, range):
1014        if (idx.step == gapsize):
1015            return deltas
1016    ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize)
1017    for i in range(shape):
1018        ret[(idx[i] - idx[0]) // gapsize] = deltas[i]
1019    return ret
1020
1021
1022def _merge_idx(idl):
1023    """Returns the union of all lists in idl as range or sorted list
1024
1025    Parameters
1026    ----------
1027    idl : list
1028        List of lists or ranges.
1029    """
1030
1031    if _check_lists_equal(idl):
1032        return idl[0]
1033
1034    idunion = sorted(set().union(*idl))
1035
1036    # Check whether idunion can be expressed as range
1037    idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0])
1038    idtest = [list(idrange), idunion]
1039    if _check_lists_equal(idtest):
1040        return idrange
1041
1042    return idunion
1043
1044
1045def _intersection_idx(idl):
1046    """Returns the intersection of all lists in idl as range or sorted list
1047
1048    Parameters
1049    ----------
1050    idl : list
1051        List of lists or ranges.
1052    """
1053
1054    if _check_lists_equal(idl):
1055        return idl[0]
1056
1057    idinter = sorted(set.intersection(*[set(o) for o in idl]))
1058
1059    # Check whether idinter can be expressed as range
1060    try:
1061        idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0])
1062        idtest = [list(idrange), idinter]
1063        if _check_lists_equal(idtest):
1064            return idrange
1065    except IndexError:
1066        pass
1067
1068    return idinter
1069
1070
1071def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1072    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1073       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1074       common divisor of the step sizes is used as new step size.
1075
1076    Parameters
1077    ----------
1078    deltas : list
1079        List of fluctuations
1080    idx : list
1081        List or range of configs on which the deltas are defined.
1082        Has to be a subset of new_idx and has to be sorted in ascending order.
1083    shape : list
1084        Number of configs in idx.
1085    new_idx : list
1086        List of configs that defines the new range, has to be sorted in ascending order.
1087    """
1088
1089    if type(idx) is range and type(new_idx) is range:
1090        if idx == new_idx:
1091            return deltas
1092    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1093    for i in range(shape):
1094        ret[idx[i] - new_idx[0]] = deltas[i]
1095    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx)
1096
1097
1098def derived_observable(func, data, array_mode=False, **kwargs):
1099    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1100
1101    Parameters
1102    ----------
1103    func : object
1104        arbitrary function of the form func(data, **kwargs). For the
1105        automatic differentiation to work, all numpy functions have to have
1106        the autograd wrapper (use 'import autograd.numpy as anp').
1107    data : list
1108        list of Obs, e.g. [obs1, obs2, obs3].
1109    num_grad : bool
1110        if True, numerical derivatives are used instead of autograd
1111        (default False). To control the numerical differentiation the
1112        kwargs of numdifftools.step_generators.MaxStepGenerator
1113        can be used.
1114    man_grad : list
1115        manually supply a list or an array which contains the jacobian
1116        of func. Use cautiously, supplying the wrong derivative will
1117        not be intercepted.
1118
1119    Notes
1120    -----
1121    For simple mathematical operations it can be practical to use anonymous
1122    functions. For the ratio of two observables one can e.g. use
1123
1124    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1125    """
1126
1127    data = np.asarray(data)
1128    raveled_data = data.ravel()
1129
1130    # Workaround for matrix operations containing non Obs data
1131    if not all(isinstance(x, Obs) for x in raveled_data):
1132        for i in range(len(raveled_data)):
1133            if isinstance(raveled_data[i], (int, float)):
1134                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1135
1136    allcov = {}
1137    for o in raveled_data:
1138        for name in o.cov_names:
1139            if name in allcov:
1140                if not np.allclose(allcov[name], o.covobs[name].cov):
1141                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1142            else:
1143                allcov[name] = o.covobs[name].cov
1144
1145    n_obs = len(raveled_data)
1146    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1147    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1148    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1149
1150    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1151
1152    if data.ndim == 1:
1153        values = np.array([o.value for o in data])
1154    else:
1155        values = np.vectorize(lambda x: x.value)(data)
1156
1157    new_values = func(values, **kwargs)
1158
1159    multi = int(isinstance(new_values, np.ndarray))
1160
1161    new_r_values = {}
1162    new_idl_d = {}
1163    for name in new_sample_names:
1164        idl = []
1165        tmp_values = np.zeros(n_obs)
1166        for i, item in enumerate(raveled_data):
1167            tmp_values[i] = item.r_values.get(name, item.value)
1168            tmp_idl = item.idl.get(name)
1169            if tmp_idl is not None:
1170                idl.append(tmp_idl)
1171        if multi > 0:
1172            tmp_values = np.array(tmp_values).reshape(data.shape)
1173        new_r_values[name] = func(tmp_values, **kwargs)
1174        new_idl_d[name] = _merge_idx(idl)
1175
1176    if 'man_grad' in kwargs:
1177        deriv = np.asarray(kwargs.get('man_grad'))
1178        if new_values.shape + data.shape != deriv.shape:
1179            raise Exception('Manual derivative does not have correct shape.')
1180    elif kwargs.get('num_grad') is True:
1181        if multi > 0:
1182            raise Exception('Multi mode currently not supported for numerical derivative')
1183        options = {
1184            'base_step': 0.1,
1185            'step_ratio': 2.5}
1186        for key in options.keys():
1187            kwarg = kwargs.get(key)
1188            if kwarg is not None:
1189                options[key] = kwarg
1190        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1191        if tmp_df.size == 1:
1192            deriv = np.array([tmp_df.real])
1193        else:
1194            deriv = tmp_df.real
1195    else:
1196        deriv = jacobian(func)(values, **kwargs)
1197
1198    final_result = np.zeros(new_values.shape, dtype=object)
1199
1200    if array_mode is True:
1201
1202        class _Zero_grad():
1203            def __init__(self, N):
1204                self.grad = np.zeros((N, 1))
1205
1206        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1207        d_extracted = {}
1208        g_extracted = {}
1209        for name in new_sample_names:
1210            d_extracted[name] = []
1211            ens_length = len(new_idl_d[name])
1212            for i_dat, dat in enumerate(data):
1213                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1214        for name in new_cov_names:
1215            g_extracted[name] = []
1216            zero_grad = _Zero_grad(new_covobs_lengths[name])
1217            for i_dat, dat in enumerate(data):
1218                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1219
1220    for i_val, new_val in np.ndenumerate(new_values):
1221        new_deltas = {}
1222        new_grad = {}
1223        if array_mode is True:
1224            for name in new_sample_names:
1225                ens_length = d_extracted[name][0].shape[-1]
1226                new_deltas[name] = np.zeros(ens_length)
1227                for i_dat, dat in enumerate(d_extracted[name]):
1228                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1229            for name in new_cov_names:
1230                new_grad[name] = 0
1231                for i_dat, dat in enumerate(g_extracted[name]):
1232                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1233        else:
1234            for j_obs, obs in np.ndenumerate(data):
1235                for name in obs.names:
1236                    if name in obs.cov_names:
1237                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1238                    else:
1239                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1240
1241        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1242
1243        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1244            raise Exception('The same name has been used for deltas and covobs!')
1245        new_samples = []
1246        new_means = []
1247        new_idl = []
1248        new_names_obs = []
1249        for name in new_names:
1250            if name not in new_covobs:
1251                new_samples.append(new_deltas[name])
1252                new_idl.append(new_idl_d[name])
1253                new_means.append(new_r_values[name][i_val])
1254                new_names_obs.append(name)
1255        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1256        for name in new_covobs:
1257            final_result[i_val].names.append(name)
1258        final_result[i_val]._covobs = new_covobs
1259        final_result[i_val]._value = new_val
1260        final_result[i_val].reweighted = reweighted
1261
1262    if multi == 0:
1263        final_result = final_result.item()
1264
1265    return final_result
1266
1267
1268def _reduce_deltas(deltas, idx_old, idx_new):
1269    """Extract deltas defined on idx_old on all configs of idx_new.
1270
1271    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1272    are ordered in an ascending order.
1273
1274    Parameters
1275    ----------
1276    deltas : list
1277        List of fluctuations
1278    idx_old : list
1279        List or range of configs on which the deltas are defined
1280    idx_new : list
1281        List of configs for which we want to extract the deltas.
1282        Has to be a subset of idx_old.
1283    """
1284    if not len(deltas) == len(idx_old):
1285        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1286    if type(idx_old) is range and type(idx_new) is range:
1287        if idx_old == idx_new:
1288            return deltas
1289    if _check_lists_equal([idx_old, idx_new]):
1290        return deltas
1291    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1292    if len(indices) < len(idx_new):
1293        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1294    return np.array(deltas)[indices]
1295
1296
1297def reweight(weight, obs, **kwargs):
1298    """Reweight a list of observables.
1299
1300    Parameters
1301    ----------
1302    weight : Obs
1303        Reweighting factor. An Observable that has to be defined on a superset of the
1304        configurations in obs[i].idl for all i.
1305    obs : list
1306        list of Obs, e.g. [obs1, obs2, obs3].
1307    all_configs : bool
1308        if True, the reweighted observables are normalized by the average of
1309        the reweighting factor on all configurations in weight.idl and not
1310        on the configurations in obs[i].idl. Default False.
1311    """
1312    result = []
1313    for i in range(len(obs)):
1314        if len(obs[i].cov_names):
1315            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1316        if not set(obs[i].names).issubset(weight.names):
1317            raise Exception('Error: Ensembles do not fit')
1318        for name in obs[i].names:
1319            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1320                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1321        new_samples = []
1322        w_deltas = {}
1323        for name in sorted(obs[i].names):
1324            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1325            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1326        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1327
1328        if kwargs.get('all_configs'):
1329            new_weight = weight
1330        else:
1331            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1332
1333        result.append(tmp_obs / new_weight)
1334        result[-1].reweighted = True
1335
1336    return result
1337
1338
1339def correlate(obs_a, obs_b):
1340    """Correlate two observables.
1341
1342    Parameters
1343    ----------
1344    obs_a : Obs
1345        First observable
1346    obs_b : Obs
1347        Second observable
1348
1349    Notes
1350    -----
1351    Keep in mind to only correlate primary observables which have not been reweighted
1352    yet. The reweighting has to be applied after correlating the observables.
1353    Currently only works if ensembles are identical (this is not strictly necessary).
1354    """
1355
1356    if sorted(obs_a.names) != sorted(obs_b.names):
1357        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1358    if len(obs_a.cov_names) or len(obs_b.cov_names):
1359        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1360    for name in obs_a.names:
1361        if obs_a.shape[name] != obs_b.shape[name]:
1362            raise Exception('Shapes of ensemble', name, 'do not fit')
1363        if obs_a.idl[name] != obs_b.idl[name]:
1364            raise Exception('idl of ensemble', name, 'do not fit')
1365
1366    if obs_a.reweighted is True:
1367        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1368    if obs_b.reweighted is True:
1369        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1370
1371    new_samples = []
1372    new_idl = []
1373    for name in sorted(obs_a.names):
1374        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1375        new_idl.append(obs_a.idl[name])
1376
1377    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1378    o.reweighted = obs_a.reweighted or obs_b.reweighted
1379    return o
1380
1381
1382def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1383    r'''Calculates the error covariance matrix of a set of observables.
1384
1385    WARNING: This function should be used with care, especially for observables with support on multiple
1386             ensembles with differing autocorrelations. See the notes below for details.
1387
1388    The gamma method has to be applied first to all observables.
1389
1390    Parameters
1391    ----------
1392    obs : list or numpy.ndarray
1393        List or one dimensional array of Obs
1394    visualize : bool
1395        If True plots the corresponding normalized correlation matrix (default False).
1396    correlation : bool
1397        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1398    smooth : None or int
1399        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1400        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1401        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1402        small ones.
1403
1404    Notes
1405    -----
1406    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1407    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1408    in the absence of autocorrelation.
1409    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1410    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1411    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1412    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1413    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1414    '''
1415
1416    length = len(obs)
1417
1418    max_samples = np.max([o.N for o in obs])
1419    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1420        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1421
1422    cov = np.zeros((length, length))
1423    for i in range(length):
1424        for j in range(i, length):
1425            cov[i, j] = _covariance_element(obs[i], obs[j])
1426    cov = cov + cov.T - np.diag(np.diag(cov))
1427
1428    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1429
1430    if isinstance(smooth, int):
1431        corr = _smooth_eigenvalues(corr, smooth)
1432
1433    if visualize:
1434        plt.matshow(corr, vmin=-1, vmax=1)
1435        plt.set_cmap('RdBu')
1436        plt.colorbar()
1437        plt.draw()
1438
1439    if correlation is True:
1440        return corr
1441
1442    errors = [o.dvalue for o in obs]
1443    cov = np.diag(errors) @ corr @ np.diag(errors)
1444
1445    eigenvalues = np.linalg.eigh(cov)[0]
1446    if not np.all(eigenvalues >= 0):
1447        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1448
1449    return cov
1450
1451
1452def _smooth_eigenvalues(corr, E):
1453    """Eigenvalue smoothing as described in hep-lat/9412087
1454
1455    corr : np.ndarray
1456        correlation matrix
1457    E : integer
1458        Number of eigenvalues to be left substantially unchanged
1459    """
1460    if not (2 < E < corr.shape[0] - 1):
1461        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1462    vals, vec = np.linalg.eigh(corr)
1463    lambda_min = np.mean(vals[:-E])
1464    vals[vals < lambda_min] = lambda_min
1465    vals /= np.mean(vals)
1466    return vec @ np.diag(vals) @ vec.T
1467
1468
1469def _covariance_element(obs1, obs2):
1470    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1471
1472    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1473        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1474        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1475        return np.sum(deltas1 * deltas2)
1476
1477    if set(obs1.names).isdisjoint(set(obs2.names)):
1478        return 0.0
1479
1480    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1481        raise Exception('The gamma method has to be applied to both Obs first.')
1482
1483    dvalue = 0.0
1484
1485    for e_name in obs1.mc_names:
1486
1487        if e_name not in obs2.mc_names:
1488            continue
1489
1490        idl_d = {}
1491        for r_name in obs1.e_content[e_name]:
1492            if r_name not in obs2.e_content[e_name]:
1493                continue
1494            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1495
1496        gamma = 0.0
1497
1498        for r_name in obs1.e_content[e_name]:
1499            if r_name not in obs2.e_content[e_name]:
1500                continue
1501            if len(idl_d[r_name]) == 0:
1502                continue
1503            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1504
1505        if gamma == 0.0:
1506            continue
1507
1508        gamma_div = 0.0
1509        for r_name in obs1.e_content[e_name]:
1510            if r_name not in obs2.e_content[e_name]:
1511                continue
1512            if len(idl_d[r_name]) == 0:
1513                continue
1514            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1515        gamma /= gamma_div
1516
1517        dvalue += gamma
1518
1519    for e_name in obs1.cov_names:
1520
1521        if e_name not in obs2.cov_names:
1522            continue
1523
1524        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1525
1526    return dvalue
1527
1528
1529def import_jackknife(jacks, name, idl=None):
1530    """Imports jackknife samples and returns an Obs
1531
1532    Parameters
1533    ----------
1534    jacks : numpy.ndarray
1535        numpy array containing the mean value as zeroth entry and
1536        the N jackknife samples as first to Nth entry.
1537    name : str
1538        name of the ensemble the samples are defined on.
1539    """
1540    length = len(jacks) - 1
1541    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1542    samples = jacks[1:] @ prj
1543    mean = np.mean(samples)
1544    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1545    new_obs._value = jacks[0]
1546    return new_obs
1547
1548
1549def merge_obs(list_of_obs):
1550    """Combine all observables in list_of_obs into one new observable
1551
1552    Parameters
1553    ----------
1554    list_of_obs : list
1555        list of the Obs object to be combined
1556
1557    Notes
1558    -----
1559    It is not possible to combine obs which are based on the same replicum
1560    """
1561    replist = [item for obs in list_of_obs for item in obs.names]
1562    if (len(replist) == len(set(replist))) is False:
1563        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1564    if any([len(o.cov_names) for o in list_of_obs]):
1565        raise Exception('Not possible to merge data that contains covobs!')
1566    new_dict = {}
1567    idl_dict = {}
1568    for o in list_of_obs:
1569        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1570                        for key in set(o.deltas) | set(o.r_values)})
1571        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1572
1573    names = sorted(new_dict.keys())
1574    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1575    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1576    return o
1577
1578
1579def cov_Obs(means, cov, name, grad=None):
1580    """Create an Obs based on mean(s) and a covariance matrix
1581
1582    Parameters
1583    ----------
1584    mean : list of floats or float
1585        N mean value(s) of the new Obs
1586    cov : list or array
1587        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1588    name : str
1589        identifier for the covariance matrix
1590    grad : list or array
1591        Gradient of the Covobs wrt. the means belonging to cov.
1592    """
1593
1594    def covobs_to_obs(co):
1595        """Make an Obs out of a Covobs
1596
1597        Parameters
1598        ----------
1599        co : Covobs
1600            Covobs to be embedded into the Obs
1601        """
1602        o = Obs([], [], means=[])
1603        o._value = co.value
1604        o.names.append(co.name)
1605        o._covobs[co.name] = co
1606        o._dvalue = np.sqrt(co.errsq())
1607        return o
1608
1609    ol = []
1610    if isinstance(means, (float, int)):
1611        means = [means]
1612
1613    for i in range(len(means)):
1614        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1615    if ol[0].covobs[name].N != len(means):
1616        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1617    if len(ol) == 1:
1618        return ol[0]
1619    return ol
1620
1621
1622def _determine_gap(o, e_content, e_name):
1623    gaps = []
1624    for r_name in e_content[e_name]:
1625        if isinstance(o.idl[r_name], range):
1626            gaps.append(o.idl[r_name].step)
1627        else:
1628            gaps.append(np.min(np.diff(o.idl[r_name])))
1629
1630    gap = min(gaps)
1631    if not np.all([gi % gap == 0 for gi in gaps]):
1632        raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps)
1633
1634    return gap
1635
1636
1637def _check_lists_equal(idl):
1638    '''
1639    Use groupby to efficiently check whether all elements of idl are identical.
1640    Returns True if all elements are equal, otherwise False.
1641
1642    Parameters
1643    ----------
1644    idl : list of lists, ranges or np.ndarrays
1645    '''
1646    g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl])
1647    if next(g, True) and not next(g, False):
1648        return True
1649    return False
class Obs:
 18class Obs:
 19    """Class for a general observable.
 20
 21    Instances of Obs are the basic objects of a pyerrors error analysis.
 22    They are initialized with a list which contains arrays of samples for
 23    different ensembles/replica and another list of same length which contains
 24    the names of the ensembles/replica. Mathematical operations can be
 25    performed on instances. The result is another instance of Obs. The error of
 26    an instance can be computed with the gamma_method. Also contains additional
 27    methods for output and visualization of the error calculation.
 28
 29    Attributes
 30    ----------
 31    S_global : float
 32        Standard value for S (default 2.0)
 33    S_dict : dict
 34        Dictionary for S values. If an entry for a given ensemble
 35        exists this overwrites the standard value for that ensemble.
 36    tau_exp_global : float
 37        Standard value for tau_exp (default 0.0)
 38    tau_exp_dict : dict
 39        Dictionary for tau_exp values. If an entry for a given ensemble exists
 40        this overwrites the standard value for that ensemble.
 41    N_sigma_global : float
 42        Standard value for N_sigma (default 1.0)
 43    N_sigma_dict : dict
 44        Dictionary for N_sigma values. If an entry for a given ensemble exists
 45        this overwrites the standard value for that ensemble.
 46    """
 47    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 48                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 49                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 50                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 51                 'idl', 'tag', '_covobs', '__dict__']
 52
 53    S_global = 2.0
 54    S_dict = {}
 55    tau_exp_global = 0.0
 56    tau_exp_dict = {}
 57    N_sigma_global = 1.0
 58    N_sigma_dict = {}
 59
 60    def __init__(self, samples, names, idl=None, **kwargs):
 61        """ Initialize Obs object.
 62
 63        Parameters
 64        ----------
 65        samples : list
 66            list of numpy arrays containing the Monte Carlo samples
 67        names : list
 68            list of strings labeling the individual samples
 69        idl : list, optional
 70            list of ranges or lists on which the samples are defined
 71        """
 72
 73        if kwargs.get("means") is None and len(samples):
 74            if len(samples) != len(names):
 75                raise ValueError('Length of samples and names incompatible.')
 76            if idl is not None:
 77                if len(idl) != len(names):
 78                    raise ValueError('Length of idl incompatible with samples and names.')
 79            name_length = len(names)
 80            if name_length > 1:
 81                if name_length != len(set(names)):
 82                    raise ValueError('Names are not unique.')
 83                if not all(isinstance(x, str) for x in names):
 84                    raise TypeError('All names have to be strings.')
 85            else:
 86                if not isinstance(names[0], str):
 87                    raise TypeError('All names have to be strings.')
 88            if min(len(x) for x in samples) <= 4:
 89                raise ValueError('Samples have to have at least 5 entries.')
 90
 91        self.names = sorted(names)
 92        self.shape = {}
 93        self.r_values = {}
 94        self.deltas = {}
 95        self._covobs = {}
 96
 97        self._value = 0
 98        self.N = 0
 99        self.idl = {}
100        if idl is not None:
101            for name, idx in sorted(zip(names, idl)):
102                if isinstance(idx, range):
103                    self.idl[name] = idx
104                elif isinstance(idx, (list, np.ndarray)):
105                    dc = np.unique(np.diff(idx))
106                    if np.any(dc < 0):
107                        raise ValueError("Unsorted idx for idl[%s]" % (name))
108                    if len(dc) == 1:
109                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
110                    else:
111                        self.idl[name] = list(idx)
112                else:
113                    raise TypeError('incompatible type for idl[%s].' % (name))
114        else:
115            for name, sample in sorted(zip(names, samples)):
116                self.idl[name] = range(1, len(sample) + 1)
117
118        if kwargs.get("means") is not None:
119            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
120                self.shape[name] = len(self.idl[name])
121                self.N += self.shape[name]
122                self.r_values[name] = mean
123                self.deltas[name] = sample
124        else:
125            for name, sample in sorted(zip(names, samples)):
126                self.shape[name] = len(self.idl[name])
127                self.N += self.shape[name]
128                if len(sample) != self.shape[name]:
129                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
130                self.r_values[name] = np.mean(sample)
131                self.deltas[name] = sample - self.r_values[name]
132                self._value += self.shape[name] * self.r_values[name]
133            self._value /= self.N
134
135        self._dvalue = 0.0
136        self.ddvalue = 0.0
137        self.reweighted = False
138
139        self.tag = None
140
141    @property
142    def value(self):
143        return self._value
144
145    @property
146    def dvalue(self):
147        return self._dvalue
148
149    @property
150    def e_names(self):
151        return sorted(set([o.split('|')[0] for o in self.names]))
152
153    @property
154    def cov_names(self):
155        return sorted(set([o for o in self.covobs.keys()]))
156
157    @property
158    def mc_names(self):
159        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
160
161    @property
162    def e_content(self):
163        res = {}
164        for e, e_name in enumerate(self.e_names):
165            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
166            if e_name in self.names:
167                res[e_name].append(e_name)
168        return res
169
170    @property
171    def covobs(self):
172        return self._covobs
173
174    def gamma_method(self, **kwargs):
175        """Estimate the error and related properties of the Obs.
176
177        Parameters
178        ----------
179        S : float
180            specifies a custom value for the parameter S (default 2.0).
181            If set to 0 it is assumed that the data exhibits no
182            autocorrelation. In this case the error estimates coincides
183            with the sample standard error.
184        tau_exp : float
185            positive value triggers the critical slowing down analysis
186            (default 0.0).
187        N_sigma : float
188            number of standard deviations from zero until the tail is
189            attached to the autocorrelation function (default 1).
190        fft : bool
191            determines whether the fft algorithm is used for the computation
192            of the autocorrelation function (default True)
193        """
194
195        e_content = self.e_content
196        self.e_dvalue = {}
197        self.e_ddvalue = {}
198        self.e_tauint = {}
199        self.e_dtauint = {}
200        self.e_windowsize = {}
201        self.e_n_tauint = {}
202        self.e_n_dtauint = {}
203        e_gamma = {}
204        self.e_rho = {}
205        self.e_drho = {}
206        self._dvalue = 0
207        self.ddvalue = 0
208
209        self.S = {}
210        self.tau_exp = {}
211        self.N_sigma = {}
212
213        if kwargs.get('fft') is False:
214            fft = False
215        else:
216            fft = True
217
218        def _parse_kwarg(kwarg_name):
219            if kwarg_name in kwargs:
220                tmp = kwargs.get(kwarg_name)
221                if isinstance(tmp, (int, float)):
222                    if tmp < 0:
223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
224                    for e, e_name in enumerate(self.e_names):
225                        getattr(self, kwarg_name)[e_name] = tmp
226                else:
227                    raise TypeError(kwarg_name + ' is not in proper format.')
228            else:
229                for e, e_name in enumerate(self.e_names):
230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
232                    else:
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
234
235        _parse_kwarg('S')
236        _parse_kwarg('tau_exp')
237        _parse_kwarg('N_sigma')
238
239        for e, e_name in enumerate(self.mc_names):
240            gapsize = _determine_gap(self, e_content, e_name)
241
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
273            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
274            # Make sure no entry of tauint is smaller than 0.5
275            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
276            # hep-lat/0306017 eq. (42)
277            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
278            self.e_n_dtauint[e_name][0] = 0.0
279
280            def _compute_drho(i):
281                tmp = (self.e_rho[e_name][i + 1:w_max]
282                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 <= 0 else 2 * (i - w_max // 2):-1],
283                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
284                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
285                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
286
287            if self.tau_exp[e_name] > 0:
288                _compute_drho(1)
289                texp = self.tau_exp[e_name]
290                # Critical slowing down analysis
291                if w_max // 2 <= 1:
292                    raise Exception("Need at least 8 samples for tau_exp error analysis")
293                for n in range(1, w_max // 2):
294                    _compute_drho(n + 1)
295                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
296                        # Bias correction hep-lat/0306017 eq. (49) included
297                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
298                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
299                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
300                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
301                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
302                        self.e_windowsize[e_name] = n
303                        break
304            else:
305                if self.S[e_name] == 0.0:
306                    self.e_tauint[e_name] = 0.5
307                    self.e_dtauint[e_name] = 0.0
308                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
309                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
310                    self.e_windowsize[e_name] = 0
311                else:
312                    # Standard automatic windowing procedure
313                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
314                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
315                    for n in range(1, w_max):
316                        if g_w[n - 1] < 0 or n >= w_max - 1:
317                            _compute_drho(n)
318                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
319                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
320                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
321                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
322                            self.e_windowsize[e_name] = n
323                            break
324
325            self._dvalue += self.e_dvalue[e_name] ** 2
326            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
327
328        for e_name in self.cov_names:
329            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
330            self.e_ddvalue[e_name] = 0
331            self._dvalue += self.e_dvalue[e_name]**2
332
333        self._dvalue = np.sqrt(self._dvalue)
334        if self._dvalue == 0.0:
335            self.ddvalue = 0.0
336        else:
337            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
338        return
339
340    gm = gamma_method
341
342    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
343        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
344           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
345
346        Parameters
347        ----------
348        deltas : list
349            List of fluctuations
350        idx : list
351            List or range of configurations on which the deltas are defined.
352        shape : int
353            Number of configurations in idx.
354        w_max : int
355            Upper bound for the summation window.
356        fft : bool
357            determines whether the fft algorithm is used for the computation
358            of the autocorrelation function.
359        gapsize : int
360            The target distance between two configurations. If longer distances
361            are found in idx, the data is expanded.
362        """
363        gamma = np.zeros(w_max)
364        deltas = _expand_deltas(deltas, idx, shape, gapsize)
365        new_shape = len(deltas)
366        if fft:
367            max_gamma = min(new_shape, w_max)
368            # The padding for the fft has to be even
369            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
370            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
371        else:
372            for n in range(w_max):
373                if new_shape - n >= 0:
374                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
375
376        return gamma
377
378    def details(self, ens_content=True):
379        """Output detailed properties of the Obs.
380
381        Parameters
382        ----------
383        ens_content : bool
384            print details about the ensembles and replica if true.
385        """
386        if self.tag is not None:
387            print("Description:", self.tag)
388        if not hasattr(self, 'e_dvalue'):
389            print('Result\t %3.8e' % (self.value))
390        else:
391            if self.value == 0.0:
392                percentage = np.nan
393            else:
394                percentage = np.abs(self._dvalue / self.value) * 100
395            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
396            if len(self.e_names) > 1:
397                print(' Ensemble errors:')
398            e_content = self.e_content
399            for e_name in self.mc_names:
400                gap = _determine_gap(self, e_content, e_name)
401
402                if len(self.e_names) > 1:
403                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
404                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
405                tau_string += f" in units of {gap} config"
406                if gap > 1:
407                    tau_string += "s"
408                if self.tau_exp[e_name] > 0:
409                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
410                else:
411                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
412                print(tau_string)
413            for e_name in self.cov_names:
414                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
415        if ens_content is True:
416            if len(self.e_names) == 1:
417                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
418            else:
419                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
420            my_string_list = []
421            for key, value in sorted(self.e_content.items()):
422                if key not in self.covobs:
423                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
424                    if len(value) == 1:
425                        my_string += f': {self.shape[value[0]]} configurations'
426                        if isinstance(self.idl[value[0]], range):
427                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
428                        else:
429                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
430                    else:
431                        sublist = []
432                        for v in value:
433                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
434                            my_substring += f': {self.shape[v]} configurations'
435                            if isinstance(self.idl[v], range):
436                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
437                            else:
438                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
439                            sublist.append(my_substring)
440
441                        my_string += '\n' + '\n'.join(sublist)
442                else:
443                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
444                my_string_list.append(my_string)
445            print('\n'.join(my_string_list))
446
447    def reweight(self, weight):
448        """Reweight the obs with given rewighting factors.
449
450        Parameters
451        ----------
452        weight : Obs
453            Reweighting factor. An Observable that has to be defined on a superset of the
454            configurations in obs[i].idl for all i.
455        all_configs : bool
456            if True, the reweighted observables are normalized by the average of
457            the reweighting factor on all configurations in weight.idl and not
458            on the configurations in obs[i].idl. Default False.
459        """
460        return reweight(weight, [self])[0]
461
462    def is_zero_within_error(self, sigma=1):
463        """Checks whether the observable is zero within 'sigma' standard errors.
464
465        Parameters
466        ----------
467        sigma : int
468            Number of standard errors used for the check.
469
470        Works only properly when the gamma method was run.
471        """
472        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
473
474    def is_zero(self, atol=1e-10):
475        """Checks whether the observable is zero within a given tolerance.
476
477        Parameters
478        ----------
479        atol : float
480            Absolute tolerance (for details see numpy documentation).
481        """
482        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
483
484    def plot_tauint(self, save=None):
485        """Plot integrated autocorrelation time for each ensemble.
486
487        Parameters
488        ----------
489        save : str
490            saves the figure to a file named 'save' if.
491        """
492        if not hasattr(self, 'e_dvalue'):
493            raise Exception('Run the gamma method first.')
494
495        for e, e_name in enumerate(self.mc_names):
496            fig = plt.figure()
497            plt.xlabel(r'$W$')
498            plt.ylabel(r'$\tau_\mathrm{int}$')
499            length = int(len(self.e_n_tauint[e_name]))
500            if self.tau_exp[e_name] > 0:
501                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
502                x_help = np.arange(2 * self.tau_exp[e_name])
503                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
504                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
505                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
506                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
507                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
508                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
509                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
510            else:
511                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
512                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
513
514            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
515            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
516            plt.legend()
517            plt.xlim(-0.5, xmax)
518            ylim = plt.ylim()
519            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
520            plt.draw()
521            if save:
522                fig.savefig(save + "_" + str(e))
523
524    def plot_rho(self, save=None):
525        """Plot normalized autocorrelation function time for each ensemble.
526
527        Parameters
528        ----------
529        save : str
530            saves the figure to a file named 'save' if.
531        """
532        if not hasattr(self, 'e_dvalue'):
533            raise Exception('Run the gamma method first.')
534        for e, e_name in enumerate(self.mc_names):
535            fig = plt.figure()
536            plt.xlabel('W')
537            plt.ylabel('rho')
538            length = int(len(self.e_drho[e_name]))
539            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
540            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
541            if self.tau_exp[e_name] > 0:
542                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
543                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
544                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
545                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
546            else:
547                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
548                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
549            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
550            plt.xlim(-0.5, xmax)
551            plt.draw()
552            if save:
553                fig.savefig(save + "_" + str(e))
554
555    def plot_rep_dist(self):
556        """Plot replica distribution for each ensemble with more than one replicum."""
557        if not hasattr(self, 'e_dvalue'):
558            raise Exception('Run the gamma method first.')
559        for e, e_name in enumerate(self.mc_names):
560            if len(self.e_content[e_name]) == 1:
561                print('No replica distribution for a single replicum (', e_name, ')')
562                continue
563            r_length = []
564            sub_r_mean = 0
565            for r, r_name in enumerate(self.e_content[e_name]):
566                r_length.append(len(self.deltas[r_name]))
567                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
568            e_N = np.sum(r_length)
569            sub_r_mean /= e_N
570            arr = np.zeros(len(self.e_content[e_name]))
571            for r, r_name in enumerate(self.e_content[e_name]):
572                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
573            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
574            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
575            plt.draw()
576
577    def plot_history(self, expand=True):
578        """Plot derived Monte Carlo history for each ensemble
579
580        Parameters
581        ----------
582        expand : bool
583            show expanded history for irregular Monte Carlo chains (default: True).
584        """
585        for e, e_name in enumerate(self.mc_names):
586            plt.figure()
587            r_length = []
588            tmp = []
589            tmp_expanded = []
590            for r, r_name in enumerate(self.e_content[e_name]):
591                tmp.append(self.deltas[r_name] + self.r_values[r_name])
592                if expand:
593                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
594                    r_length.append(len(tmp_expanded[-1]))
595                else:
596                    r_length.append(len(tmp[-1]))
597            e_N = np.sum(r_length)
598            x = np.arange(e_N)
599            y_test = np.concatenate(tmp, axis=0)
600            if expand:
601                y = np.concatenate(tmp_expanded, axis=0)
602            else:
603                y = y_test
604            plt.errorbar(x, y, fmt='.', markersize=3)
605            plt.xlim(-0.5, e_N - 0.5)
606            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
607            plt.draw()
608
609    def plot_piechart(self, save=None):
610        """Plot piechart which shows the fractional contribution of each
611        ensemble to the error and returns a dictionary containing the fractions.
612
613        Parameters
614        ----------
615        save : str
616            saves the figure to a file named 'save' if.
617        """
618        if not hasattr(self, 'e_dvalue'):
619            raise Exception('Run the gamma method first.')
620        if np.isclose(0.0, self._dvalue, atol=1e-15):
621            raise Exception('Error is 0.0')
622        labels = self.e_names
623        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
624        fig1, ax1 = plt.subplots()
625        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
626        ax1.axis('equal')
627        plt.draw()
628        if save:
629            fig1.savefig(save)
630
631        return dict(zip(self.e_names, sizes))
632
633    def dump(self, filename, datatype="json.gz", description="", **kwargs):
634        """Dump the Obs to a file 'name' of chosen format.
635
636        Parameters
637        ----------
638        filename : str
639            name of the file to be saved.
640        datatype : str
641            Format of the exported file. Supported formats include
642            "json.gz" and "pickle"
643        description : str
644            Description for output file, only relevant for json.gz format.
645        path : str
646            specifies a custom path for the file (default '.')
647        """
648        if 'path' in kwargs:
649            file_name = kwargs.get('path') + '/' + filename
650        else:
651            file_name = filename
652
653        if datatype == "json.gz":
654            from .input.json import dump_to_json
655            dump_to_json([self], file_name, description=description)
656        elif datatype == "pickle":
657            with open(file_name + '.p', 'wb') as fb:
658                pickle.dump(self, fb)
659        else:
660            raise Exception("Unknown datatype " + str(datatype))
661
662    def export_jackknife(self):
663        """Export jackknife samples from the Obs
664
665        Returns
666        -------
667        numpy.ndarray
668            Returns a numpy array of length N + 1 where N is the number of samples
669            for the given ensemble and replicum. The zeroth entry of the array contains
670            the mean value of the Obs, entries 1 to N contain the N jackknife samples
671            derived from the Obs. The current implementation only works for observables
672            defined on exactly one ensemble and replicum. The derived jackknife samples
673            should agree with samples from a full jackknife analysis up to O(1/N).
674        """
675
676        if len(self.names) != 1:
677            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
678
679        name = self.names[0]
680        full_data = self.deltas[name] + self.r_values[name]
681        n = full_data.size
682        mean = self.value
683        tmp_jacks = np.zeros(n + 1)
684        tmp_jacks[0] = mean
685        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
686        return tmp_jacks
687
688    def __float__(self):
689        return float(self.value)
690
691    def __repr__(self):
692        return 'Obs[' + str(self) + ']'
693
694    def __str__(self):
695        return _format_uncertainty(self.value, self._dvalue)
696
697    def __format__(self, format_type):
698        my_str = _format_uncertainty(self.value, self._dvalue,
699                                     significance=int(float(format_type.replace("+", "").replace("-", ""))))
700        for char in ["+", " "]:
701            if format_type.startswith(char):
702                if my_str[0] != "-":
703                    my_str = char + my_str
704        return my_str
705
706    def __hash__(self):
707        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
708        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
709        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
710        hash_tuple += tuple([o.encode() for o in self.names])
711        m = hashlib.md5()
712        [m.update(o) for o in hash_tuple]
713        return int(m.hexdigest(), 16) & 0xFFFFFFFF
714
715    # Overload comparisons
716    def __lt__(self, other):
717        return self.value < other
718
719    def __le__(self, other):
720        return self.value <= other
721
722    def __gt__(self, other):
723        return self.value > other
724
725    def __ge__(self, other):
726        return self.value >= other
727
728    def __eq__(self, other):
729        return (self - other).is_zero()
730
731    def __ne__(self, other):
732        return not (self - other).is_zero()
733
734    # Overload math operations
735    def __add__(self, y):
736        if isinstance(y, Obs):
737            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
738        else:
739            if isinstance(y, np.ndarray):
740                return np.array([self + o for o in y])
741            elif y.__class__.__name__ in ['Corr', 'CObs']:
742                return NotImplemented
743            else:
744                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
745
746    def __radd__(self, y):
747        return self + y
748
749    def __mul__(self, y):
750        if isinstance(y, Obs):
751            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
752        else:
753            if isinstance(y, np.ndarray):
754                return np.array([self * o for o in y])
755            elif isinstance(y, complex):
756                return CObs(self * y.real, self * y.imag)
757            elif y.__class__.__name__ in ['Corr', 'CObs']:
758                return NotImplemented
759            else:
760                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
761
762    def __rmul__(self, y):
763        return self * y
764
765    def __sub__(self, y):
766        if isinstance(y, Obs):
767            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
768        else:
769            if isinstance(y, np.ndarray):
770                return np.array([self - o for o in y])
771            elif y.__class__.__name__ in ['Corr', 'CObs']:
772                return NotImplemented
773            else:
774                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
775
776    def __rsub__(self, y):
777        return -1 * (self - y)
778
779    def __pos__(self):
780        return self
781
782    def __neg__(self):
783        return -1 * self
784
785    def __truediv__(self, y):
786        if isinstance(y, Obs):
787            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
788        else:
789            if isinstance(y, np.ndarray):
790                return np.array([self / o for o in y])
791            elif y.__class__.__name__ in ['Corr', 'CObs']:
792                return NotImplemented
793            else:
794                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
795
796    def __rtruediv__(self, y):
797        if isinstance(y, Obs):
798            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
799        else:
800            if isinstance(y, np.ndarray):
801                return np.array([o / self for o in y])
802            elif y.__class__.__name__ in ['Corr', 'CObs']:
803                return NotImplemented
804            else:
805                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
806
807    def __pow__(self, y):
808        if isinstance(y, Obs):
809            return derived_observable(lambda x: x[0] ** x[1], [self, y])
810        else:
811            return derived_observable(lambda x: x[0] ** y, [self])
812
813    def __rpow__(self, y):
814        if isinstance(y, Obs):
815            return derived_observable(lambda x: x[0] ** x[1], [y, self])
816        else:
817            return derived_observable(lambda x: y ** x[0], [self])
818
819    def __abs__(self):
820        return derived_observable(lambda x: anp.abs(x[0]), [self])
821
822    # Overload numpy functions
823    def sqrt(self):
824        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
825
826    def log(self):
827        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
828
829    def exp(self):
830        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
831
832    def sin(self):
833        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
834
835    def cos(self):
836        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
837
838    def tan(self):
839        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
840
841    def arcsin(self):
842        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
843
844    def arccos(self):
845        return derived_observable(lambda x: anp.arccos(x[0]), [self])
846
847    def arctan(self):
848        return derived_observable(lambda x: anp.arctan(x[0]), [self])
849
850    def sinh(self):
851        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
852
853    def cosh(self):
854        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
855
856    def tanh(self):
857        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
858
859    def arcsinh(self):
860        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
861
862    def arccosh(self):
863        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
864
865    def arctanh(self):
866        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 60    def __init__(self, samples, names, idl=None, **kwargs):
 61        """ Initialize Obs object.
 62
 63        Parameters
 64        ----------
 65        samples : list
 66            list of numpy arrays containing the Monte Carlo samples
 67        names : list
 68            list of strings labeling the individual samples
 69        idl : list, optional
 70            list of ranges or lists on which the samples are defined
 71        """
 72
 73        if kwargs.get("means") is None and len(samples):
 74            if len(samples) != len(names):
 75                raise ValueError('Length of samples and names incompatible.')
 76            if idl is not None:
 77                if len(idl) != len(names):
 78                    raise ValueError('Length of idl incompatible with samples and names.')
 79            name_length = len(names)
 80            if name_length > 1:
 81                if name_length != len(set(names)):
 82                    raise ValueError('Names are not unique.')
 83                if not all(isinstance(x, str) for x in names):
 84                    raise TypeError('All names have to be strings.')
 85            else:
 86                if not isinstance(names[0], str):
 87                    raise TypeError('All names have to be strings.')
 88            if min(len(x) for x in samples) <= 4:
 89                raise ValueError('Samples have to have at least 5 entries.')
 90
 91        self.names = sorted(names)
 92        self.shape = {}
 93        self.r_values = {}
 94        self.deltas = {}
 95        self._covobs = {}
 96
 97        self._value = 0
 98        self.N = 0
 99        self.idl = {}
100        if idl is not None:
101            for name, idx in sorted(zip(names, idl)):
102                if isinstance(idx, range):
103                    self.idl[name] = idx
104                elif isinstance(idx, (list, np.ndarray)):
105                    dc = np.unique(np.diff(idx))
106                    if np.any(dc < 0):
107                        raise ValueError("Unsorted idx for idl[%s]" % (name))
108                    if len(dc) == 1:
109                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
110                    else:
111                        self.idl[name] = list(idx)
112                else:
113                    raise TypeError('incompatible type for idl[%s].' % (name))
114        else:
115            for name, sample in sorted(zip(names, samples)):
116                self.idl[name] = range(1, len(sample) + 1)
117
118        if kwargs.get("means") is not None:
119            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
120                self.shape[name] = len(self.idl[name])
121                self.N += self.shape[name]
122                self.r_values[name] = mean
123                self.deltas[name] = sample
124        else:
125            for name, sample in sorted(zip(names, samples)):
126                self.shape[name] = len(self.idl[name])
127                self.N += self.shape[name]
128                if len(sample) != self.shape[name]:
129                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
130                self.r_values[name] = np.mean(sample)
131                self.deltas[name] = sample - self.r_values[name]
132                self._value += self.shape[name] * self.r_values[name]
133            self._value /= self.N
134
135        self._dvalue = 0.0
136        self.ddvalue = 0.0
137        self.reweighted = False
138
139        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
def gamma_method(self, **kwargs):
174    def gamma_method(self, **kwargs):
175        """Estimate the error and related properties of the Obs.
176
177        Parameters
178        ----------
179        S : float
180            specifies a custom value for the parameter S (default 2.0).
181            If set to 0 it is assumed that the data exhibits no
182            autocorrelation. In this case the error estimates coincides
183            with the sample standard error.
184        tau_exp : float
185            positive value triggers the critical slowing down analysis
186            (default 0.0).
187        N_sigma : float
188            number of standard deviations from zero until the tail is
189            attached to the autocorrelation function (default 1).
190        fft : bool
191            determines whether the fft algorithm is used for the computation
192            of the autocorrelation function (default True)
193        """
194
195        e_content = self.e_content
196        self.e_dvalue = {}
197        self.e_ddvalue = {}
198        self.e_tauint = {}
199        self.e_dtauint = {}
200        self.e_windowsize = {}
201        self.e_n_tauint = {}
202        self.e_n_dtauint = {}
203        e_gamma = {}
204        self.e_rho = {}
205        self.e_drho = {}
206        self._dvalue = 0
207        self.ddvalue = 0
208
209        self.S = {}
210        self.tau_exp = {}
211        self.N_sigma = {}
212
213        if kwargs.get('fft') is False:
214            fft = False
215        else:
216            fft = True
217
218        def _parse_kwarg(kwarg_name):
219            if kwarg_name in kwargs:
220                tmp = kwargs.get(kwarg_name)
221                if isinstance(tmp, (int, float)):
222                    if tmp < 0:
223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
224                    for e, e_name in enumerate(self.e_names):
225                        getattr(self, kwarg_name)[e_name] = tmp
226                else:
227                    raise TypeError(kwarg_name + ' is not in proper format.')
228            else:
229                for e, e_name in enumerate(self.e_names):
230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
232                    else:
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
234
235        _parse_kwarg('S')
236        _parse_kwarg('tau_exp')
237        _parse_kwarg('N_sigma')
238
239        for e, e_name in enumerate(self.mc_names):
240            gapsize = _determine_gap(self, e_content, e_name)
241
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
273            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
274            # Make sure no entry of tauint is smaller than 0.5
275            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
276            # hep-lat/0306017 eq. (42)
277            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
278            self.e_n_dtauint[e_name][0] = 0.0
279
280            def _compute_drho(i):
281                tmp = (self.e_rho[e_name][i + 1:w_max]
282                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 <= 0 else 2 * (i - w_max // 2):-1],
283                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
284                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
285                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
286
287            if self.tau_exp[e_name] > 0:
288                _compute_drho(1)
289                texp = self.tau_exp[e_name]
290                # Critical slowing down analysis
291                if w_max // 2 <= 1:
292                    raise Exception("Need at least 8 samples for tau_exp error analysis")
293                for n in range(1, w_max // 2):
294                    _compute_drho(n + 1)
295                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
296                        # Bias correction hep-lat/0306017 eq. (49) included
297                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
298                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
299                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
300                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
301                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
302                        self.e_windowsize[e_name] = n
303                        break
304            else:
305                if self.S[e_name] == 0.0:
306                    self.e_tauint[e_name] = 0.5
307                    self.e_dtauint[e_name] = 0.0
308                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
309                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
310                    self.e_windowsize[e_name] = 0
311                else:
312                    # Standard automatic windowing procedure
313                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
314                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
315                    for n in range(1, w_max):
316                        if g_w[n - 1] < 0 or n >= w_max - 1:
317                            _compute_drho(n)
318                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
319                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
320                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
321                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
322                            self.e_windowsize[e_name] = n
323                            break
324
325            self._dvalue += self.e_dvalue[e_name] ** 2
326            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
327
328        for e_name in self.cov_names:
329            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
330            self.e_ddvalue[e_name] = 0
331            self._dvalue += self.e_dvalue[e_name]**2
332
333        self._dvalue = np.sqrt(self._dvalue)
334        if self._dvalue == 0.0:
335            self.ddvalue = 0.0
336        else:
337            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
338        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def gm(self, **kwargs):
174    def gamma_method(self, **kwargs):
175        """Estimate the error and related properties of the Obs.
176
177        Parameters
178        ----------
179        S : float
180            specifies a custom value for the parameter S (default 2.0).
181            If set to 0 it is assumed that the data exhibits no
182            autocorrelation. In this case the error estimates coincides
183            with the sample standard error.
184        tau_exp : float
185            positive value triggers the critical slowing down analysis
186            (default 0.0).
187        N_sigma : float
188            number of standard deviations from zero until the tail is
189            attached to the autocorrelation function (default 1).
190        fft : bool
191            determines whether the fft algorithm is used for the computation
192            of the autocorrelation function (default True)
193        """
194
195        e_content = self.e_content
196        self.e_dvalue = {}
197        self.e_ddvalue = {}
198        self.e_tauint = {}
199        self.e_dtauint = {}
200        self.e_windowsize = {}
201        self.e_n_tauint = {}
202        self.e_n_dtauint = {}
203        e_gamma = {}
204        self.e_rho = {}
205        self.e_drho = {}
206        self._dvalue = 0
207        self.ddvalue = 0
208
209        self.S = {}
210        self.tau_exp = {}
211        self.N_sigma = {}
212
213        if kwargs.get('fft') is False:
214            fft = False
215        else:
216            fft = True
217
218        def _parse_kwarg(kwarg_name):
219            if kwarg_name in kwargs:
220                tmp = kwargs.get(kwarg_name)
221                if isinstance(tmp, (int, float)):
222                    if tmp < 0:
223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
224                    for e, e_name in enumerate(self.e_names):
225                        getattr(self, kwarg_name)[e_name] = tmp
226                else:
227                    raise TypeError(kwarg_name + ' is not in proper format.')
228            else:
229                for e, e_name in enumerate(self.e_names):
230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
232                    else:
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
234
235        _parse_kwarg('S')
236        _parse_kwarg('tau_exp')
237        _parse_kwarg('N_sigma')
238
239        for e, e_name in enumerate(self.mc_names):
240            gapsize = _determine_gap(self, e_content, e_name)
241
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
273            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
274            # Make sure no entry of tauint is smaller than 0.5
275            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
276            # hep-lat/0306017 eq. (42)
277            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
278            self.e_n_dtauint[e_name][0] = 0.0
279
280            def _compute_drho(i):
281                tmp = (self.e_rho[e_name][i + 1:w_max]
282                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 <= 0 else 2 * (i - w_max // 2):-1],
283                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
284                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
285                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
286
287            if self.tau_exp[e_name] > 0:
288                _compute_drho(1)
289                texp = self.tau_exp[e_name]
290                # Critical slowing down analysis
291                if w_max // 2 <= 1:
292                    raise Exception("Need at least 8 samples for tau_exp error analysis")
293                for n in range(1, w_max // 2):
294                    _compute_drho(n + 1)
295                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
296                        # Bias correction hep-lat/0306017 eq. (49) included
297                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
298                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
299                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
300                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
301                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
302                        self.e_windowsize[e_name] = n
303                        break
304            else:
305                if self.S[e_name] == 0.0:
306                    self.e_tauint[e_name] = 0.5
307                    self.e_dtauint[e_name] = 0.0
308                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
309                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
310                    self.e_windowsize[e_name] = 0
311                else:
312                    # Standard automatic windowing procedure
313                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
314                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
315                    for n in range(1, w_max):
316                        if g_w[n - 1] < 0 or n >= w_max - 1:
317                            _compute_drho(n)
318                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
319                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
320                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
321                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
322                            self.e_windowsize[e_name] = n
323                            break
324
325            self._dvalue += self.e_dvalue[e_name] ** 2
326            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
327
328        for e_name in self.cov_names:
329            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
330            self.e_ddvalue[e_name] = 0
331            self._dvalue += self.e_dvalue[e_name]**2
332
333        self._dvalue = np.sqrt(self._dvalue)
334        if self._dvalue == 0.0:
335            self.ddvalue = 0.0
336        else:
337            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
338        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True):
378    def details(self, ens_content=True):
379        """Output detailed properties of the Obs.
380
381        Parameters
382        ----------
383        ens_content : bool
384            print details about the ensembles and replica if true.
385        """
386        if self.tag is not None:
387            print("Description:", self.tag)
388        if not hasattr(self, 'e_dvalue'):
389            print('Result\t %3.8e' % (self.value))
390        else:
391            if self.value == 0.0:
392                percentage = np.nan
393            else:
394                percentage = np.abs(self._dvalue / self.value) * 100
395            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
396            if len(self.e_names) > 1:
397                print(' Ensemble errors:')
398            e_content = self.e_content
399            for e_name in self.mc_names:
400                gap = _determine_gap(self, e_content, e_name)
401
402                if len(self.e_names) > 1:
403                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
404                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
405                tau_string += f" in units of {gap} config"
406                if gap > 1:
407                    tau_string += "s"
408                if self.tau_exp[e_name] > 0:
409                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
410                else:
411                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
412                print(tau_string)
413            for e_name in self.cov_names:
414                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
415        if ens_content is True:
416            if len(self.e_names) == 1:
417                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
418            else:
419                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
420            my_string_list = []
421            for key, value in sorted(self.e_content.items()):
422                if key not in self.covobs:
423                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
424                    if len(value) == 1:
425                        my_string += f': {self.shape[value[0]]} configurations'
426                        if isinstance(self.idl[value[0]], range):
427                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
428                        else:
429                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
430                    else:
431                        sublist = []
432                        for v in value:
433                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
434                            my_substring += f': {self.shape[v]} configurations'
435                            if isinstance(self.idl[v], range):
436                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
437                            else:
438                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
439                            sublist.append(my_substring)
440
441                        my_string += '\n' + '\n'.join(sublist)
442                else:
443                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
444                my_string_list.append(my_string)
445            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def reweight(self, weight):
447    def reweight(self, weight):
448        """Reweight the obs with given rewighting factors.
449
450        Parameters
451        ----------
452        weight : Obs
453            Reweighting factor. An Observable that has to be defined on a superset of the
454            configurations in obs[i].idl for all i.
455        all_configs : bool
456            if True, the reweighted observables are normalized by the average of
457            the reweighting factor on all configurations in weight.idl and not
458            on the configurations in obs[i].idl. Default False.
459        """
460        return reweight(weight, [self])[0]

Reweight the obs with given rewighting factors.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def is_zero_within_error(self, sigma=1):
462    def is_zero_within_error(self, sigma=1):
463        """Checks whether the observable is zero within 'sigma' standard errors.
464
465        Parameters
466        ----------
467        sigma : int
468            Number of standard errors used for the check.
469
470        Works only properly when the gamma method was run.
471        """
472        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10):
474    def is_zero(self, atol=1e-10):
475        """Checks whether the observable is zero within a given tolerance.
476
477        Parameters
478        ----------
479        atol : float
480            Absolute tolerance (for details see numpy documentation).
481        """
482        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None):
484    def plot_tauint(self, save=None):
485        """Plot integrated autocorrelation time for each ensemble.
486
487        Parameters
488        ----------
489        save : str
490            saves the figure to a file named 'save' if.
491        """
492        if not hasattr(self, 'e_dvalue'):
493            raise Exception('Run the gamma method first.')
494
495        for e, e_name in enumerate(self.mc_names):
496            fig = plt.figure()
497            plt.xlabel(r'$W$')
498            plt.ylabel(r'$\tau_\mathrm{int}$')
499            length = int(len(self.e_n_tauint[e_name]))
500            if self.tau_exp[e_name] > 0:
501                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
502                x_help = np.arange(2 * self.tau_exp[e_name])
503                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
504                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
505                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
506                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
507                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
508                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
509                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
510            else:
511                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
512                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
513
514            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
515            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
516            plt.legend()
517            plt.xlim(-0.5, xmax)
518            ylim = plt.ylim()
519            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
520            plt.draw()
521            if save:
522                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None):
524    def plot_rho(self, save=None):
525        """Plot normalized autocorrelation function time for each ensemble.
526
527        Parameters
528        ----------
529        save : str
530            saves the figure to a file named 'save' if.
531        """
532        if not hasattr(self, 'e_dvalue'):
533            raise Exception('Run the gamma method first.')
534        for e, e_name in enumerate(self.mc_names):
535            fig = plt.figure()
536            plt.xlabel('W')
537            plt.ylabel('rho')
538            length = int(len(self.e_drho[e_name]))
539            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
540            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
541            if self.tau_exp[e_name] > 0:
542                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
543                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
544                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
545                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
546            else:
547                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
548                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
549            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
550            plt.xlim(-0.5, xmax)
551            plt.draw()
552            if save:
553                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self):
555    def plot_rep_dist(self):
556        """Plot replica distribution for each ensemble with more than one replicum."""
557        if not hasattr(self, 'e_dvalue'):
558            raise Exception('Run the gamma method first.')
559        for e, e_name in enumerate(self.mc_names):
560            if len(self.e_content[e_name]) == 1:
561                print('No replica distribution for a single replicum (', e_name, ')')
562                continue
563            r_length = []
564            sub_r_mean = 0
565            for r, r_name in enumerate(self.e_content[e_name]):
566                r_length.append(len(self.deltas[r_name]))
567                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
568            e_N = np.sum(r_length)
569            sub_r_mean /= e_N
570            arr = np.zeros(len(self.e_content[e_name]))
571            for r, r_name in enumerate(self.e_content[e_name]):
572                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
573            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
574            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
575            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True):
577    def plot_history(self, expand=True):
578        """Plot derived Monte Carlo history for each ensemble
579
580        Parameters
581        ----------
582        expand : bool
583            show expanded history for irregular Monte Carlo chains (default: True).
584        """
585        for e, e_name in enumerate(self.mc_names):
586            plt.figure()
587            r_length = []
588            tmp = []
589            tmp_expanded = []
590            for r, r_name in enumerate(self.e_content[e_name]):
591                tmp.append(self.deltas[r_name] + self.r_values[r_name])
592                if expand:
593                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
594                    r_length.append(len(tmp_expanded[-1]))
595                else:
596                    r_length.append(len(tmp[-1]))
597            e_N = np.sum(r_length)
598            x = np.arange(e_N)
599            y_test = np.concatenate(tmp, axis=0)
600            if expand:
601                y = np.concatenate(tmp_expanded, axis=0)
602            else:
603                y = y_test
604            plt.errorbar(x, y, fmt='.', markersize=3)
605            plt.xlim(-0.5, e_N - 0.5)
606            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
607            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None):
609    def plot_piechart(self, save=None):
610        """Plot piechart which shows the fractional contribution of each
611        ensemble to the error and returns a dictionary containing the fractions.
612
613        Parameters
614        ----------
615        save : str
616            saves the figure to a file named 'save' if.
617        """
618        if not hasattr(self, 'e_dvalue'):
619            raise Exception('Run the gamma method first.')
620        if np.isclose(0.0, self._dvalue, atol=1e-15):
621            raise Exception('Error is 0.0')
622        labels = self.e_names
623        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
624        fig1, ax1 = plt.subplots()
625        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
626        ax1.axis('equal')
627        plt.draw()
628        if save:
629            fig1.savefig(save)
630
631        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs):
633    def dump(self, filename, datatype="json.gz", description="", **kwargs):
634        """Dump the Obs to a file 'name' of chosen format.
635
636        Parameters
637        ----------
638        filename : str
639            name of the file to be saved.
640        datatype : str
641            Format of the exported file. Supported formats include
642            "json.gz" and "pickle"
643        description : str
644            Description for output file, only relevant for json.gz format.
645        path : str
646            specifies a custom path for the file (default '.')
647        """
648        if 'path' in kwargs:
649            file_name = kwargs.get('path') + '/' + filename
650        else:
651            file_name = filename
652
653        if datatype == "json.gz":
654            from .input.json import dump_to_json
655            dump_to_json([self], file_name, description=description)
656        elif datatype == "pickle":
657            with open(file_name + '.p', 'wb') as fb:
658                pickle.dump(self, fb)
659        else:
660            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self):
662    def export_jackknife(self):
663        """Export jackknife samples from the Obs
664
665        Returns
666        -------
667        numpy.ndarray
668            Returns a numpy array of length N + 1 where N is the number of samples
669            for the given ensemble and replicum. The zeroth entry of the array contains
670            the mean value of the Obs, entries 1 to N contain the N jackknife samples
671            derived from the Obs. The current implementation only works for observables
672            defined on exactly one ensemble and replicum. The derived jackknife samples
673            should agree with samples from a full jackknife analysis up to O(1/N).
674        """
675
676        if len(self.names) != 1:
677            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
678
679        name = self.names[0]
680        full_data = self.deltas[name] + self.r_values[name]
681        n = full_data.size
682        mean = self.value
683        tmp_jacks = np.zeros(n + 1)
684        tmp_jacks[0] = mean
685        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
686        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def sqrt(self):
823    def sqrt(self):
824        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self):
826    def log(self):
827        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self):
829    def exp(self):
830        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self):
832    def sin(self):
833        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self):
835    def cos(self):
836        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self):
838    def tan(self):
839        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self):
841    def arcsin(self):
842        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self):
844    def arccos(self):
845        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self):
847    def arctan(self):
848        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self):
850    def sinh(self):
851        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self):
853    def cosh(self):
854        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self):
856    def tanh(self):
857        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self):
859    def arcsinh(self):
860        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self):
862    def arccosh(self):
863        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self):
865    def arctanh(self):
866        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
class CObs:
869class CObs:
870    """Class for a complex valued observable."""
871    __slots__ = ['_real', '_imag', 'tag']
872
873    def __init__(self, real, imag=0.0):
874        self._real = real
875        self._imag = imag
876        self.tag = None
877
878    @property
879    def real(self):
880        return self._real
881
882    @property
883    def imag(self):
884        return self._imag
885
886    def gamma_method(self, **kwargs):
887        """Executes the gamma_method for the real and the imaginary part."""
888        if isinstance(self.real, Obs):
889            self.real.gamma_method(**kwargs)
890        if isinstance(self.imag, Obs):
891            self.imag.gamma_method(**kwargs)
892
893    def is_zero(self):
894        """Checks whether both real and imaginary part are zero within machine precision."""
895        return self.real == 0.0 and self.imag == 0.0
896
897    def conjugate(self):
898        return CObs(self.real, -self.imag)
899
900    def __add__(self, other):
901        if isinstance(other, np.ndarray):
902            return other + self
903        elif hasattr(other, 'real') and hasattr(other, 'imag'):
904            return CObs(self.real + other.real,
905                        self.imag + other.imag)
906        else:
907            return CObs(self.real + other, self.imag)
908
909    def __radd__(self, y):
910        return self + y
911
912    def __sub__(self, other):
913        if isinstance(other, np.ndarray):
914            return -1 * (other - self)
915        elif hasattr(other, 'real') and hasattr(other, 'imag'):
916            return CObs(self.real - other.real, self.imag - other.imag)
917        else:
918            return CObs(self.real - other, self.imag)
919
920    def __rsub__(self, other):
921        return -1 * (self - other)
922
923    def __mul__(self, other):
924        if isinstance(other, np.ndarray):
925            return other * self
926        elif hasattr(other, 'real') and hasattr(other, 'imag'):
927            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
928                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
929                                               [self.real, other.real, self.imag, other.imag],
930                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
931                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
932                                               [self.real, other.real, self.imag, other.imag],
933                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
934            elif getattr(other, 'imag', 0) != 0:
935                return CObs(self.real * other.real - self.imag * other.imag,
936                            self.imag * other.real + self.real * other.imag)
937            else:
938                return CObs(self.real * other.real, self.imag * other.real)
939        else:
940            return CObs(self.real * other, self.imag * other)
941
942    def __rmul__(self, other):
943        return self * other
944
945    def __truediv__(self, other):
946        if isinstance(other, np.ndarray):
947            return 1 / (other / self)
948        elif hasattr(other, 'real') and hasattr(other, 'imag'):
949            r = other.real ** 2 + other.imag ** 2
950            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
951        else:
952            return CObs(self.real / other, self.imag / other)
953
954    def __rtruediv__(self, other):
955        r = self.real ** 2 + self.imag ** 2
956        if hasattr(other, 'real') and hasattr(other, 'imag'):
957            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
958        else:
959            return CObs(self.real * other / r, -self.imag * other / r)
960
961    def __abs__(self):
962        return np.sqrt(self.real**2 + self.imag**2)
963
964    def __pos__(self):
965        return self
966
967    def __neg__(self):
968        return -1 * self
969
970    def __eq__(self, other):
971        return self.real == other.real and self.imag == other.imag
972
973    def __str__(self):
974        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
975
976    def __repr__(self):
977        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0)
873    def __init__(self, real, imag=0.0):
874        self._real = real
875        self._imag = imag
876        self.tag = None
def gamma_method(self, **kwargs):
886    def gamma_method(self, **kwargs):
887        """Executes the gamma_method for the real and the imaginary part."""
888        if isinstance(self.real, Obs):
889            self.real.gamma_method(**kwargs)
890        if isinstance(self.imag, Obs):
891            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self):
893    def is_zero(self):
894        """Checks whether both real and imaginary part are zero within machine precision."""
895        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self):
897    def conjugate(self):
898        return CObs(self.real, -self.imag)
def derived_observable(func, data, array_mode=False, **kwargs):
1099def derived_observable(func, data, array_mode=False, **kwargs):
1100    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1101
1102    Parameters
1103    ----------
1104    func : object
1105        arbitrary function of the form func(data, **kwargs). For the
1106        automatic differentiation to work, all numpy functions have to have
1107        the autograd wrapper (use 'import autograd.numpy as anp').
1108    data : list
1109        list of Obs, e.g. [obs1, obs2, obs3].
1110    num_grad : bool
1111        if True, numerical derivatives are used instead of autograd
1112        (default False). To control the numerical differentiation the
1113        kwargs of numdifftools.step_generators.MaxStepGenerator
1114        can be used.
1115    man_grad : list
1116        manually supply a list or an array which contains the jacobian
1117        of func. Use cautiously, supplying the wrong derivative will
1118        not be intercepted.
1119
1120    Notes
1121    -----
1122    For simple mathematical operations it can be practical to use anonymous
1123    functions. For the ratio of two observables one can e.g. use
1124
1125    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1126    """
1127
1128    data = np.asarray(data)
1129    raveled_data = data.ravel()
1130
1131    # Workaround for matrix operations containing non Obs data
1132    if not all(isinstance(x, Obs) for x in raveled_data):
1133        for i in range(len(raveled_data)):
1134            if isinstance(raveled_data[i], (int, float)):
1135                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1136
1137    allcov = {}
1138    for o in raveled_data:
1139        for name in o.cov_names:
1140            if name in allcov:
1141                if not np.allclose(allcov[name], o.covobs[name].cov):
1142                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1143            else:
1144                allcov[name] = o.covobs[name].cov
1145
1146    n_obs = len(raveled_data)
1147    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1148    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1149    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1150
1151    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1152
1153    if data.ndim == 1:
1154        values = np.array([o.value for o in data])
1155    else:
1156        values = np.vectorize(lambda x: x.value)(data)
1157
1158    new_values = func(values, **kwargs)
1159
1160    multi = int(isinstance(new_values, np.ndarray))
1161
1162    new_r_values = {}
1163    new_idl_d = {}
1164    for name in new_sample_names:
1165        idl = []
1166        tmp_values = np.zeros(n_obs)
1167        for i, item in enumerate(raveled_data):
1168            tmp_values[i] = item.r_values.get(name, item.value)
1169            tmp_idl = item.idl.get(name)
1170            if tmp_idl is not None:
1171                idl.append(tmp_idl)
1172        if multi > 0:
1173            tmp_values = np.array(tmp_values).reshape(data.shape)
1174        new_r_values[name] = func(tmp_values, **kwargs)
1175        new_idl_d[name] = _merge_idx(idl)
1176
1177    if 'man_grad' in kwargs:
1178        deriv = np.asarray(kwargs.get('man_grad'))
1179        if new_values.shape + data.shape != deriv.shape:
1180            raise Exception('Manual derivative does not have correct shape.')
1181    elif kwargs.get('num_grad') is True:
1182        if multi > 0:
1183            raise Exception('Multi mode currently not supported for numerical derivative')
1184        options = {
1185            'base_step': 0.1,
1186            'step_ratio': 2.5}
1187        for key in options.keys():
1188            kwarg = kwargs.get(key)
1189            if kwarg is not None:
1190                options[key] = kwarg
1191        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1192        if tmp_df.size == 1:
1193            deriv = np.array([tmp_df.real])
1194        else:
1195            deriv = tmp_df.real
1196    else:
1197        deriv = jacobian(func)(values, **kwargs)
1198
1199    final_result = np.zeros(new_values.shape, dtype=object)
1200
1201    if array_mode is True:
1202
1203        class _Zero_grad():
1204            def __init__(self, N):
1205                self.grad = np.zeros((N, 1))
1206
1207        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1208        d_extracted = {}
1209        g_extracted = {}
1210        for name in new_sample_names:
1211            d_extracted[name] = []
1212            ens_length = len(new_idl_d[name])
1213            for i_dat, dat in enumerate(data):
1214                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1215        for name in new_cov_names:
1216            g_extracted[name] = []
1217            zero_grad = _Zero_grad(new_covobs_lengths[name])
1218            for i_dat, dat in enumerate(data):
1219                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1220
1221    for i_val, new_val in np.ndenumerate(new_values):
1222        new_deltas = {}
1223        new_grad = {}
1224        if array_mode is True:
1225            for name in new_sample_names:
1226                ens_length = d_extracted[name][0].shape[-1]
1227                new_deltas[name] = np.zeros(ens_length)
1228                for i_dat, dat in enumerate(d_extracted[name]):
1229                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1230            for name in new_cov_names:
1231                new_grad[name] = 0
1232                for i_dat, dat in enumerate(g_extracted[name]):
1233                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1234        else:
1235            for j_obs, obs in np.ndenumerate(data):
1236                for name in obs.names:
1237                    if name in obs.cov_names:
1238                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1239                    else:
1240                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1241
1242        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1243
1244        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1245            raise Exception('The same name has been used for deltas and covobs!')
1246        new_samples = []
1247        new_means = []
1248        new_idl = []
1249        new_names_obs = []
1250        for name in new_names:
1251            if name not in new_covobs:
1252                new_samples.append(new_deltas[name])
1253                new_idl.append(new_idl_d[name])
1254                new_means.append(new_r_values[name][i_val])
1255                new_names_obs.append(name)
1256        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1257        for name in new_covobs:
1258            final_result[i_val].names.append(name)
1259        final_result[i_val]._covobs = new_covobs
1260        final_result[i_val]._value = new_val
1261        final_result[i_val].reweighted = reweighted
1262
1263    if multi == 0:
1264        final_result = final_result.item()
1265
1266    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs):
1298def reweight(weight, obs, **kwargs):
1299    """Reweight a list of observables.
1300
1301    Parameters
1302    ----------
1303    weight : Obs
1304        Reweighting factor. An Observable that has to be defined on a superset of the
1305        configurations in obs[i].idl for all i.
1306    obs : list
1307        list of Obs, e.g. [obs1, obs2, obs3].
1308    all_configs : bool
1309        if True, the reweighted observables are normalized by the average of
1310        the reweighting factor on all configurations in weight.idl and not
1311        on the configurations in obs[i].idl. Default False.
1312    """
1313    result = []
1314    for i in range(len(obs)):
1315        if len(obs[i].cov_names):
1316            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1317        if not set(obs[i].names).issubset(weight.names):
1318            raise Exception('Error: Ensembles do not fit')
1319        for name in obs[i].names:
1320            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1321                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1322        new_samples = []
1323        w_deltas = {}
1324        for name in sorted(obs[i].names):
1325            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1326            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1327        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1328
1329        if kwargs.get('all_configs'):
1330            new_weight = weight
1331        else:
1332            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1333
1334        result.append(tmp_obs / new_weight)
1335        result[-1].reweighted = True
1336
1337    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def correlate(obs_a, obs_b):
1340def correlate(obs_a, obs_b):
1341    """Correlate two observables.
1342
1343    Parameters
1344    ----------
1345    obs_a : Obs
1346        First observable
1347    obs_b : Obs
1348        Second observable
1349
1350    Notes
1351    -----
1352    Keep in mind to only correlate primary observables which have not been reweighted
1353    yet. The reweighting has to be applied after correlating the observables.
1354    Currently only works if ensembles are identical (this is not strictly necessary).
1355    """
1356
1357    if sorted(obs_a.names) != sorted(obs_b.names):
1358        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1359    if len(obs_a.cov_names) or len(obs_b.cov_names):
1360        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1361    for name in obs_a.names:
1362        if obs_a.shape[name] != obs_b.shape[name]:
1363            raise Exception('Shapes of ensemble', name, 'do not fit')
1364        if obs_a.idl[name] != obs_b.idl[name]:
1365            raise Exception('idl of ensemble', name, 'do not fit')
1366
1367    if obs_a.reweighted is True:
1368        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1369    if obs_b.reweighted is True:
1370        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1371
1372    new_samples = []
1373    new_idl = []
1374    for name in sorted(obs_a.names):
1375        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1376        new_idl.append(obs_a.idl[name])
1377
1378    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1379    o.reweighted = obs_a.reweighted or obs_b.reweighted
1380    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1383def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1384    r'''Calculates the error covariance matrix of a set of observables.
1385
1386    WARNING: This function should be used with care, especially for observables with support on multiple
1387             ensembles with differing autocorrelations. See the notes below for details.
1388
1389    The gamma method has to be applied first to all observables.
1390
1391    Parameters
1392    ----------
1393    obs : list or numpy.ndarray
1394        List or one dimensional array of Obs
1395    visualize : bool
1396        If True plots the corresponding normalized correlation matrix (default False).
1397    correlation : bool
1398        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1399    smooth : None or int
1400        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1401        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1402        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1403        small ones.
1404
1405    Notes
1406    -----
1407    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1408    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1409    in the absence of autocorrelation.
1410    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1411    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1412    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1413    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1414    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1415    '''
1416
1417    length = len(obs)
1418
1419    max_samples = np.max([o.N for o in obs])
1420    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1421        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1422
1423    cov = np.zeros((length, length))
1424    for i in range(length):
1425        for j in range(i, length):
1426            cov[i, j] = _covariance_element(obs[i], obs[j])
1427    cov = cov + cov.T - np.diag(np.diag(cov))
1428
1429    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1430
1431    if isinstance(smooth, int):
1432        corr = _smooth_eigenvalues(corr, smooth)
1433
1434    if visualize:
1435        plt.matshow(corr, vmin=-1, vmax=1)
1436        plt.set_cmap('RdBu')
1437        plt.colorbar()
1438        plt.draw()
1439
1440    if correlation is True:
1441        return corr
1442
1443    errors = [o.dvalue for o in obs]
1444    cov = np.diag(errors) @ corr @ np.diag(errors)
1445
1446    eigenvalues = np.linalg.eigh(cov)[0]
1447    if not np.all(eigenvalues >= 0):
1448        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1449
1450    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None):
1530def import_jackknife(jacks, name, idl=None):
1531    """Imports jackknife samples and returns an Obs
1532
1533    Parameters
1534    ----------
1535    jacks : numpy.ndarray
1536        numpy array containing the mean value as zeroth entry and
1537        the N jackknife samples as first to Nth entry.
1538    name : str
1539        name of the ensemble the samples are defined on.
1540    """
1541    length = len(jacks) - 1
1542    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1543    samples = jacks[1:] @ prj
1544    mean = np.mean(samples)
1545    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1546    new_obs._value = jacks[0]
1547    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def merge_obs(list_of_obs):
1550def merge_obs(list_of_obs):
1551    """Combine all observables in list_of_obs into one new observable
1552
1553    Parameters
1554    ----------
1555    list_of_obs : list
1556        list of the Obs object to be combined
1557
1558    Notes
1559    -----
1560    It is not possible to combine obs which are based on the same replicum
1561    """
1562    replist = [item for obs in list_of_obs for item in obs.names]
1563    if (len(replist) == len(set(replist))) is False:
1564        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1565    if any([len(o.cov_names) for o in list_of_obs]):
1566        raise Exception('Not possible to merge data that contains covobs!')
1567    new_dict = {}
1568    idl_dict = {}
1569    for o in list_of_obs:
1570        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1571                        for key in set(o.deltas) | set(o.r_values)})
1572        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1573
1574    names = sorted(new_dict.keys())
1575    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1576    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1577    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None):
1580def cov_Obs(means, cov, name, grad=None):
1581    """Create an Obs based on mean(s) and a covariance matrix
1582
1583    Parameters
1584    ----------
1585    mean : list of floats or float
1586        N mean value(s) of the new Obs
1587    cov : list or array
1588        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1589    name : str
1590        identifier for the covariance matrix
1591    grad : list or array
1592        Gradient of the Covobs wrt. the means belonging to cov.
1593    """
1594
1595    def covobs_to_obs(co):
1596        """Make an Obs out of a Covobs
1597
1598        Parameters
1599        ----------
1600        co : Covobs
1601            Covobs to be embedded into the Obs
1602        """
1603        o = Obs([], [], means=[])
1604        o._value = co.value
1605        o.names.append(co.name)
1606        o._covobs[co.name] = co
1607        o._dvalue = np.sqrt(co.errsq())
1608        return o
1609
1610    ol = []
1611    if isinstance(means, (float, int)):
1612        means = [means]
1613
1614    for i in range(len(means)):
1615        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1616    if ol[0].covobs[name].N != len(means):
1617        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1618    if len(ol) == 1:
1619        return ol[0]
1620    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.