pyerrors.obs

   1import warnings
   2import hashlib
   3import pickle
   4import numpy as np
   5import autograd.numpy as anp  # Thinly-wrapped numpy
   6from autograd import jacobian
   7import matplotlib.pyplot as plt
   8from scipy.stats import skew, skewtest, kurtosis, kurtosistest
   9import numdifftools as nd
  10from itertools import groupby
  11from .covobs import Covobs
  12
  13# Improve print output of numpy.ndarrays containing Obs objects.
  14np.set_printoptions(formatter={'object': lambda x: str(x)})
  15
  16
  17class Obs:
  18    """Class for a general observable.
  19
  20    Instances of Obs are the basic objects of a pyerrors error analysis.
  21    They are initialized with a list which contains arrays of samples for
  22    different ensembles/replica and another list of same length which contains
  23    the names of the ensembles/replica. Mathematical operations can be
  24    performed on instances. The result is another instance of Obs. The error of
  25    an instance can be computed with the gamma_method. Also contains additional
  26    methods for output and visualization of the error calculation.
  27
  28    Attributes
  29    ----------
  30    S_global : float
  31        Standard value for S (default 2.0)
  32    S_dict : dict
  33        Dictionary for S values. If an entry for a given ensemble
  34        exists this overwrites the standard value for that ensemble.
  35    tau_exp_global : float
  36        Standard value for tau_exp (default 0.0)
  37    tau_exp_dict : dict
  38        Dictionary for tau_exp values. If an entry for a given ensemble exists
  39        this overwrites the standard value for that ensemble.
  40    N_sigma_global : float
  41        Standard value for N_sigma (default 1.0)
  42    N_sigma_dict : dict
  43        Dictionary for N_sigma values. If an entry for a given ensemble exists
  44        this overwrites the standard value for that ensemble.
  45    """
  46    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  47                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  48                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  49                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  50                 'idl', 'tag', '_covobs', '__dict__']
  51
  52    S_global = 2.0
  53    S_dict = {}
  54    tau_exp_global = 0.0
  55    tau_exp_dict = {}
  56    N_sigma_global = 1.0
  57    N_sigma_dict = {}
  58
  59    def __init__(self, samples, names, idl=None, **kwargs):
  60        """ Initialize Obs object.
  61
  62        Parameters
  63        ----------
  64        samples : list
  65            list of numpy arrays containing the Monte Carlo samples
  66        names : list
  67            list of strings labeling the individual samples
  68        idl : list, optional
  69            list of ranges or lists on which the samples are defined
  70        """
  71
  72        if kwargs.get("means") is None and len(samples):
  73            if len(samples) != len(names):
  74                raise ValueError('Length of samples and names incompatible.')
  75            if idl is not None:
  76                if len(idl) != len(names):
  77                    raise ValueError('Length of idl incompatible with samples and names.')
  78            name_length = len(names)
  79            if name_length > 1:
  80                if name_length != len(set(names)):
  81                    raise ValueError('Names are not unique.')
  82                if not all(isinstance(x, str) for x in names):
  83                    raise TypeError('All names have to be strings.')
  84            else:
  85                if not isinstance(names[0], str):
  86                    raise TypeError('All names have to be strings.')
  87            if min(len(x) for x in samples) <= 4:
  88                raise ValueError('Samples have to have at least 5 entries.')
  89
  90        self.names = sorted(names)
  91        self.shape = {}
  92        self.r_values = {}
  93        self.deltas = {}
  94        self._covobs = {}
  95
  96        self._value = 0
  97        self.N = 0
  98        self.idl = {}
  99        if idl is not None:
 100            for name, idx in sorted(zip(names, idl)):
 101                if isinstance(idx, range):
 102                    self.idl[name] = idx
 103                elif isinstance(idx, (list, np.ndarray)):
 104                    dc = np.unique(np.diff(idx))
 105                    if np.any(dc < 0):
 106                        raise ValueError("Unsorted idx for idl[%s]" % (name))
 107                    if len(dc) == 1:
 108                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 109                    else:
 110                        self.idl[name] = list(idx)
 111                else:
 112                    raise TypeError('incompatible type for idl[%s].' % (name))
 113        else:
 114            for name, sample in sorted(zip(names, samples)):
 115                self.idl[name] = range(1, len(sample) + 1)
 116
 117        if kwargs.get("means") is not None:
 118            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 119                self.shape[name] = len(self.idl[name])
 120                self.N += self.shape[name]
 121                self.r_values[name] = mean
 122                self.deltas[name] = sample
 123        else:
 124            for name, sample in sorted(zip(names, samples)):
 125                self.shape[name] = len(self.idl[name])
 126                self.N += self.shape[name]
 127                if len(sample) != self.shape[name]:
 128                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 129                self.r_values[name] = np.mean(sample)
 130                self.deltas[name] = sample - self.r_values[name]
 131                self._value += self.shape[name] * self.r_values[name]
 132            self._value /= self.N
 133
 134        self._dvalue = 0.0
 135        self.ddvalue = 0.0
 136        self.reweighted = False
 137
 138        self.tag = None
 139
 140    @property
 141    def value(self):
 142        return self._value
 143
 144    @property
 145    def dvalue(self):
 146        return self._dvalue
 147
 148    @property
 149    def e_names(self):
 150        return sorted(set([o.split('|')[0] for o in self.names]))
 151
 152    @property
 153    def cov_names(self):
 154        return sorted(set([o for o in self.covobs.keys()]))
 155
 156    @property
 157    def mc_names(self):
 158        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 159
 160    @property
 161    def e_content(self):
 162        res = {}
 163        for e, e_name in enumerate(self.e_names):
 164            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 165            if e_name in self.names:
 166                res[e_name].append(e_name)
 167        return res
 168
 169    @property
 170    def covobs(self):
 171        return self._covobs
 172
 173    def gamma_method(self, **kwargs):
 174        """Estimate the error and related properties of the Obs.
 175
 176        Parameters
 177        ----------
 178        S : float
 179            specifies a custom value for the parameter S (default 2.0).
 180            If set to 0 it is assumed that the data exhibits no
 181            autocorrelation. In this case the error estimates coincides
 182            with the sample standard error.
 183        tau_exp : float
 184            positive value triggers the critical slowing down analysis
 185            (default 0.0).
 186        N_sigma : float
 187            number of standard deviations from zero until the tail is
 188            attached to the autocorrelation function (default 1).
 189        fft : bool
 190            determines whether the fft algorithm is used for the computation
 191            of the autocorrelation function (default True)
 192        """
 193
 194        e_content = self.e_content
 195        self.e_dvalue = {}
 196        self.e_ddvalue = {}
 197        self.e_tauint = {}
 198        self.e_dtauint = {}
 199        self.e_windowsize = {}
 200        self.e_n_tauint = {}
 201        self.e_n_dtauint = {}
 202        e_gamma = {}
 203        self.e_rho = {}
 204        self.e_drho = {}
 205        self._dvalue = 0
 206        self.ddvalue = 0
 207
 208        self.S = {}
 209        self.tau_exp = {}
 210        self.N_sigma = {}
 211
 212        if kwargs.get('fft') is False:
 213            fft = False
 214        else:
 215            fft = True
 216
 217        def _parse_kwarg(kwarg_name):
 218            if kwarg_name in kwargs:
 219                tmp = kwargs.get(kwarg_name)
 220                if isinstance(tmp, (int, float)):
 221                    if tmp < 0:
 222                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 223                    for e, e_name in enumerate(self.e_names):
 224                        getattr(self, kwarg_name)[e_name] = tmp
 225                else:
 226                    raise TypeError(kwarg_name + ' is not in proper format.')
 227            else:
 228                for e, e_name in enumerate(self.e_names):
 229                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 230                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 231                    else:
 232                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 233
 234        _parse_kwarg('S')
 235        _parse_kwarg('tau_exp')
 236        _parse_kwarg('N_sigma')
 237
 238        for e, e_name in enumerate(self.mc_names):
 239            gapsize = _determine_gap(self, e_content, e_name)
 240
 241            r_length = []
 242            for r_name in e_content[e_name]:
 243                if isinstance(self.idl[r_name], range):
 244                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
 245                else:
 246                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
 247
 248            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 249            w_max = max(r_length) // 2
 250            e_gamma[e_name] = np.zeros(w_max)
 251            self.e_rho[e_name] = np.zeros(w_max)
 252            self.e_drho[e_name] = np.zeros(w_max)
 253
 254            for r_name in e_content[e_name]:
 255                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 256
 257            gamma_div = np.zeros(w_max)
 258            for r_name in e_content[e_name]:
 259                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 260            gamma_div[gamma_div < 1] = 1.0
 261            e_gamma[e_name] /= gamma_div[:w_max]
 262
 263            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 264                self.e_tauint[e_name] = 0.5
 265                self.e_dtauint[e_name] = 0.0
 266                self.e_dvalue[e_name] = 0.0
 267                self.e_ddvalue[e_name] = 0.0
 268                self.e_windowsize[e_name] = 0
 269                continue
 270
 271            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 272            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 273            # Make sure no entry of tauint is smaller than 0.5
 274            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 275            # hep-lat/0306017 eq. (42)
 276            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 277            self.e_n_dtauint[e_name][0] = 0.0
 278
 279            def _compute_drho(i):
 280                tmp = (self.e_rho[e_name][i + 1:w_max]
 281                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
 282                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
 283                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
 284                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 285
 286            if self.tau_exp[e_name] > 0:
 287                _compute_drho(1)
 288                texp = self.tau_exp[e_name]
 289                # Critical slowing down analysis
 290                if w_max // 2 <= 1:
 291                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 292                for n in range(1, w_max // 2):
 293                    _compute_drho(n + 1)
 294                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 295                        # Bias correction hep-lat/0306017 eq. (49) included
 296                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 297                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 298                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 299                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 300                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 301                        self.e_windowsize[e_name] = n
 302                        break
 303            else:
 304                if self.S[e_name] == 0.0:
 305                    self.e_tauint[e_name] = 0.5
 306                    self.e_dtauint[e_name] = 0.0
 307                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 308                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 309                    self.e_windowsize[e_name] = 0
 310                else:
 311                    # Standard automatic windowing procedure
 312                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 313                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 314                    for n in range(1, w_max):
 315                        if g_w[n - 1] < 0 or n >= w_max - 1:
 316                            _compute_drho(n)
 317                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 318                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 319                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 320                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 321                            self.e_windowsize[e_name] = n
 322                            break
 323
 324            self._dvalue += self.e_dvalue[e_name] ** 2
 325            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 326
 327        for e_name in self.cov_names:
 328            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 329            self.e_ddvalue[e_name] = 0
 330            self._dvalue += self.e_dvalue[e_name]**2
 331
 332        self._dvalue = np.sqrt(self._dvalue)
 333        if self._dvalue == 0.0:
 334            self.ddvalue = 0.0
 335        else:
 336            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 337        return
 338
 339    gm = gamma_method
 340
 341    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
 342        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 343           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 344
 345        Parameters
 346        ----------
 347        deltas : list
 348            List of fluctuations
 349        idx : list
 350            List or range of configurations on which the deltas are defined.
 351        shape : int
 352            Number of configurations in idx.
 353        w_max : int
 354            Upper bound for the summation window.
 355        fft : bool
 356            determines whether the fft algorithm is used for the computation
 357            of the autocorrelation function.
 358        gapsize : int
 359            The target distance between two configurations. If longer distances
 360            are found in idx, the data is expanded.
 361        """
 362        gamma = np.zeros(w_max)
 363        deltas = _expand_deltas(deltas, idx, shape, gapsize)
 364        new_shape = len(deltas)
 365        if fft:
 366            max_gamma = min(new_shape, w_max)
 367            # The padding for the fft has to be even
 368            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 369            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 370        else:
 371            for n in range(w_max):
 372                if new_shape - n >= 0:
 373                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 374
 375        return gamma
 376
 377    def details(self, ens_content=True):
 378        """Output detailed properties of the Obs.
 379
 380        Parameters
 381        ----------
 382        ens_content : bool
 383            print details about the ensembles and replica if true.
 384        """
 385        if self.tag is not None:
 386            print("Description:", self.tag)
 387        if not hasattr(self, 'e_dvalue'):
 388            print('Result\t %3.8e' % (self.value))
 389        else:
 390            if self.value == 0.0:
 391                percentage = np.nan
 392            else:
 393                percentage = np.abs(self._dvalue / self.value) * 100
 394            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 395            if len(self.e_names) > 1:
 396                print(' Ensemble errors:')
 397            e_content = self.e_content
 398            for e_name in self.mc_names:
 399                gap = _determine_gap(self, e_content, e_name)
 400
 401                if len(self.e_names) > 1:
 402                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 403                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 404                tau_string += f" in units of {gap} config"
 405                if gap > 1:
 406                    tau_string += "s"
 407                if self.tau_exp[e_name] > 0:
 408                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 409                else:
 410                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 411                print(tau_string)
 412            for e_name in self.cov_names:
 413                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 414        if ens_content is True:
 415            if len(self.e_names) == 1:
 416                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 417            else:
 418                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 419            my_string_list = []
 420            for key, value in sorted(self.e_content.items()):
 421                if key not in self.covobs:
 422                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 423                    if len(value) == 1:
 424                        my_string += f': {self.shape[value[0]]} configurations'
 425                        if isinstance(self.idl[value[0]], range):
 426                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 427                        else:
 428                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 429                    else:
 430                        sublist = []
 431                        for v in value:
 432                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 433                            my_substring += f': {self.shape[v]} configurations'
 434                            if isinstance(self.idl[v], range):
 435                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 436                            else:
 437                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 438                            sublist.append(my_substring)
 439
 440                        my_string += '\n' + '\n'.join(sublist)
 441                else:
 442                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 443                my_string_list.append(my_string)
 444            print('\n'.join(my_string_list))
 445
 446    def reweight(self, weight):
 447        """Reweight the obs with given rewighting factors.
 448
 449        Parameters
 450        ----------
 451        weight : Obs
 452            Reweighting factor. An Observable that has to be defined on a superset of the
 453            configurations in obs[i].idl for all i.
 454        all_configs : bool
 455            if True, the reweighted observables are normalized by the average of
 456            the reweighting factor on all configurations in weight.idl and not
 457            on the configurations in obs[i].idl. Default False.
 458        """
 459        return reweight(weight, [self])[0]
 460
 461    def is_zero_within_error(self, sigma=1):
 462        """Checks whether the observable is zero within 'sigma' standard errors.
 463
 464        Parameters
 465        ----------
 466        sigma : int
 467            Number of standard errors used for the check.
 468
 469        Works only properly when the gamma method was run.
 470        """
 471        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 472
 473    def is_zero(self, atol=1e-10):
 474        """Checks whether the observable is zero within a given tolerance.
 475
 476        Parameters
 477        ----------
 478        atol : float
 479            Absolute tolerance (for details see numpy documentation).
 480        """
 481        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 482
 483    def plot_tauint(self, save=None):
 484        """Plot integrated autocorrelation time for each ensemble.
 485
 486        Parameters
 487        ----------
 488        save : str
 489            saves the figure to a file named 'save' if.
 490        """
 491        if not hasattr(self, 'e_dvalue'):
 492            raise Exception('Run the gamma method first.')
 493
 494        for e, e_name in enumerate(self.mc_names):
 495            fig = plt.figure()
 496            plt.xlabel(r'$W$')
 497            plt.ylabel(r'$\tau_\mathrm{int}$')
 498            length = int(len(self.e_n_tauint[e_name]))
 499            if self.tau_exp[e_name] > 0:
 500                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 501                x_help = np.arange(2 * self.tau_exp[e_name])
 502                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 503                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 504                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 505                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 506                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 507                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 508                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 509            else:
 510                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 511                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 512
 513            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 514            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 515            plt.legend()
 516            plt.xlim(-0.5, xmax)
 517            ylim = plt.ylim()
 518            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 519            plt.draw()
 520            if save:
 521                fig.savefig(save + "_" + str(e))
 522
 523    def plot_rho(self, save=None):
 524        """Plot normalized autocorrelation function time for each ensemble.
 525
 526        Parameters
 527        ----------
 528        save : str
 529            saves the figure to a file named 'save' if.
 530        """
 531        if not hasattr(self, 'e_dvalue'):
 532            raise Exception('Run the gamma method first.')
 533        for e, e_name in enumerate(self.mc_names):
 534            fig = plt.figure()
 535            plt.xlabel('W')
 536            plt.ylabel('rho')
 537            length = int(len(self.e_drho[e_name]))
 538            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 539            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 540            if self.tau_exp[e_name] > 0:
 541                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 542                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 543                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 544                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 545            else:
 546                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 547                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 548            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 549            plt.xlim(-0.5, xmax)
 550            plt.draw()
 551            if save:
 552                fig.savefig(save + "_" + str(e))
 553
 554    def plot_rep_dist(self):
 555        """Plot replica distribution for each ensemble with more than one replicum."""
 556        if not hasattr(self, 'e_dvalue'):
 557            raise Exception('Run the gamma method first.')
 558        for e, e_name in enumerate(self.mc_names):
 559            if len(self.e_content[e_name]) == 1:
 560                print('No replica distribution for a single replicum (', e_name, ')')
 561                continue
 562            r_length = []
 563            sub_r_mean = 0
 564            for r, r_name in enumerate(self.e_content[e_name]):
 565                r_length.append(len(self.deltas[r_name]))
 566                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 567            e_N = np.sum(r_length)
 568            sub_r_mean /= e_N
 569            arr = np.zeros(len(self.e_content[e_name]))
 570            for r, r_name in enumerate(self.e_content[e_name]):
 571                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 572            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 573            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 574            plt.draw()
 575
 576    def plot_history(self, expand=True):
 577        """Plot derived Monte Carlo history for each ensemble
 578
 579        Parameters
 580        ----------
 581        expand : bool
 582            show expanded history for irregular Monte Carlo chains (default: True).
 583        """
 584        for e, e_name in enumerate(self.mc_names):
 585            plt.figure()
 586            r_length = []
 587            tmp = []
 588            tmp_expanded = []
 589            for r, r_name in enumerate(self.e_content[e_name]):
 590                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 591                if expand:
 592                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
 593                    r_length.append(len(tmp_expanded[-1]))
 594                else:
 595                    r_length.append(len(tmp[-1]))
 596            e_N = np.sum(r_length)
 597            x = np.arange(e_N)
 598            y_test = np.concatenate(tmp, axis=0)
 599            if expand:
 600                y = np.concatenate(tmp_expanded, axis=0)
 601            else:
 602                y = y_test
 603            plt.errorbar(x, y, fmt='.', markersize=3)
 604            plt.xlim(-0.5, e_N - 0.5)
 605            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 606            plt.draw()
 607
 608    def plot_piechart(self, save=None):
 609        """Plot piechart which shows the fractional contribution of each
 610        ensemble to the error and returns a dictionary containing the fractions.
 611
 612        Parameters
 613        ----------
 614        save : str
 615            saves the figure to a file named 'save' if.
 616        """
 617        if not hasattr(self, 'e_dvalue'):
 618            raise Exception('Run the gamma method first.')
 619        if np.isclose(0.0, self._dvalue, atol=1e-15):
 620            raise Exception('Error is 0.0')
 621        labels = self.e_names
 622        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 623        fig1, ax1 = plt.subplots()
 624        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 625        ax1.axis('equal')
 626        plt.draw()
 627        if save:
 628            fig1.savefig(save)
 629
 630        return dict(zip(self.e_names, sizes))
 631
 632    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 633        """Dump the Obs to a file 'name' of chosen format.
 634
 635        Parameters
 636        ----------
 637        filename : str
 638            name of the file to be saved.
 639        datatype : str
 640            Format of the exported file. Supported formats include
 641            "json.gz" and "pickle"
 642        description : str
 643            Description for output file, only relevant for json.gz format.
 644        path : str
 645            specifies a custom path for the file (default '.')
 646        """
 647        if 'path' in kwargs:
 648            file_name = kwargs.get('path') + '/' + filename
 649        else:
 650            file_name = filename
 651
 652        if datatype == "json.gz":
 653            from .input.json import dump_to_json
 654            dump_to_json([self], file_name, description=description)
 655        elif datatype == "pickle":
 656            with open(file_name + '.p', 'wb') as fb:
 657                pickle.dump(self, fb)
 658        else:
 659            raise Exception("Unknown datatype " + str(datatype))
 660
 661    def export_jackknife(self):
 662        """Export jackknife samples from the Obs
 663
 664        Returns
 665        -------
 666        numpy.ndarray
 667            Returns a numpy array of length N + 1 where N is the number of samples
 668            for the given ensemble and replicum. The zeroth entry of the array contains
 669            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 670            derived from the Obs. The current implementation only works for observables
 671            defined on exactly one ensemble and replicum. The derived jackknife samples
 672            should agree with samples from a full jackknife analysis up to O(1/N).
 673        """
 674
 675        if len(self.names) != 1:
 676            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 677
 678        name = self.names[0]
 679        full_data = self.deltas[name] + self.r_values[name]
 680        n = full_data.size
 681        mean = self.value
 682        tmp_jacks = np.zeros(n + 1)
 683        tmp_jacks[0] = mean
 684        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 685        return tmp_jacks
 686
 687    def __float__(self):
 688        return float(self.value)
 689
 690    def __repr__(self):
 691        return 'Obs[' + str(self) + ']'
 692
 693    def __str__(self):
 694        return _format_uncertainty(self.value, self._dvalue)
 695
 696    def __format__(self, format_type):
 697        if format_type == "":
 698            significance = 2
 699        else:
 700            significance = int(float(format_type.replace("+", "").replace("-", "")))
 701        my_str = _format_uncertainty(self.value, self._dvalue,
 702                                     significance=significance)
 703        for char in ["+", " "]:
 704            if format_type.startswith(char):
 705                if my_str[0] != "-":
 706                    my_str = char + my_str
 707        return my_str
 708
 709    def __hash__(self):
 710        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 711        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 712        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 713        hash_tuple += tuple([o.encode() for o in self.names])
 714        m = hashlib.md5()
 715        [m.update(o) for o in hash_tuple]
 716        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 717
 718    # Overload comparisons
 719    def __lt__(self, other):
 720        return self.value < other
 721
 722    def __le__(self, other):
 723        return self.value <= other
 724
 725    def __gt__(self, other):
 726        return self.value > other
 727
 728    def __ge__(self, other):
 729        return self.value >= other
 730
 731    def __eq__(self, other):
 732        return (self - other).is_zero()
 733
 734    def __ne__(self, other):
 735        return not (self - other).is_zero()
 736
 737    # Overload math operations
 738    def __add__(self, y):
 739        if isinstance(y, Obs):
 740            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 741        else:
 742            if isinstance(y, np.ndarray):
 743                return np.array([self + o for o in y])
 744            elif y.__class__.__name__ in ['Corr', 'CObs']:
 745                return NotImplemented
 746            else:
 747                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 748
 749    def __radd__(self, y):
 750        return self + y
 751
 752    def __mul__(self, y):
 753        if isinstance(y, Obs):
 754            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 755        else:
 756            if isinstance(y, np.ndarray):
 757                return np.array([self * o for o in y])
 758            elif isinstance(y, complex):
 759                return CObs(self * y.real, self * y.imag)
 760            elif y.__class__.__name__ in ['Corr', 'CObs']:
 761                return NotImplemented
 762            else:
 763                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 764
 765    def __rmul__(self, y):
 766        return self * y
 767
 768    def __sub__(self, y):
 769        if isinstance(y, Obs):
 770            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 771        else:
 772            if isinstance(y, np.ndarray):
 773                return np.array([self - o for o in y])
 774            elif y.__class__.__name__ in ['Corr', 'CObs']:
 775                return NotImplemented
 776            else:
 777                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 778
 779    def __rsub__(self, y):
 780        return -1 * (self - y)
 781
 782    def __pos__(self):
 783        return self
 784
 785    def __neg__(self):
 786        return -1 * self
 787
 788    def __truediv__(self, y):
 789        if isinstance(y, Obs):
 790            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 791        else:
 792            if isinstance(y, np.ndarray):
 793                return np.array([self / o for o in y])
 794            elif y.__class__.__name__ in ['Corr', 'CObs']:
 795                return NotImplemented
 796            else:
 797                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 798
 799    def __rtruediv__(self, y):
 800        if isinstance(y, Obs):
 801            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 802        else:
 803            if isinstance(y, np.ndarray):
 804                return np.array([o / self for o in y])
 805            elif y.__class__.__name__ in ['Corr', 'CObs']:
 806                return NotImplemented
 807            else:
 808                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 809
 810    def __pow__(self, y):
 811        if isinstance(y, Obs):
 812            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 813        else:
 814            return derived_observable(lambda x: x[0] ** y, [self])
 815
 816    def __rpow__(self, y):
 817        if isinstance(y, Obs):
 818            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 819        else:
 820            return derived_observable(lambda x: y ** x[0], [self])
 821
 822    def __abs__(self):
 823        return derived_observable(lambda x: anp.abs(x[0]), [self])
 824
 825    # Overload numpy functions
 826    def sqrt(self):
 827        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 828
 829    def log(self):
 830        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 831
 832    def exp(self):
 833        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 834
 835    def sin(self):
 836        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 837
 838    def cos(self):
 839        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 840
 841    def tan(self):
 842        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 843
 844    def arcsin(self):
 845        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 846
 847    def arccos(self):
 848        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 849
 850    def arctan(self):
 851        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 852
 853    def sinh(self):
 854        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 855
 856    def cosh(self):
 857        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 858
 859    def tanh(self):
 860        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 861
 862    def arcsinh(self):
 863        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 864
 865    def arccosh(self):
 866        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 867
 868    def arctanh(self):
 869        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 870
 871
 872class CObs:
 873    """Class for a complex valued observable."""
 874    __slots__ = ['_real', '_imag', 'tag']
 875
 876    def __init__(self, real, imag=0.0):
 877        self._real = real
 878        self._imag = imag
 879        self.tag = None
 880
 881    @property
 882    def real(self):
 883        return self._real
 884
 885    @property
 886    def imag(self):
 887        return self._imag
 888
 889    def gamma_method(self, **kwargs):
 890        """Executes the gamma_method for the real and the imaginary part."""
 891        if isinstance(self.real, Obs):
 892            self.real.gamma_method(**kwargs)
 893        if isinstance(self.imag, Obs):
 894            self.imag.gamma_method(**kwargs)
 895
 896    def is_zero(self):
 897        """Checks whether both real and imaginary part are zero within machine precision."""
 898        return self.real == 0.0 and self.imag == 0.0
 899
 900    def conjugate(self):
 901        return CObs(self.real, -self.imag)
 902
 903    def __add__(self, other):
 904        if isinstance(other, np.ndarray):
 905            return other + self
 906        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 907            return CObs(self.real + other.real,
 908                        self.imag + other.imag)
 909        else:
 910            return CObs(self.real + other, self.imag)
 911
 912    def __radd__(self, y):
 913        return self + y
 914
 915    def __sub__(self, other):
 916        if isinstance(other, np.ndarray):
 917            return -1 * (other - self)
 918        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 919            return CObs(self.real - other.real, self.imag - other.imag)
 920        else:
 921            return CObs(self.real - other, self.imag)
 922
 923    def __rsub__(self, other):
 924        return -1 * (self - other)
 925
 926    def __mul__(self, other):
 927        if isinstance(other, np.ndarray):
 928            return other * self
 929        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 930            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 931                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 932                                               [self.real, other.real, self.imag, other.imag],
 933                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 934                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 935                                               [self.real, other.real, self.imag, other.imag],
 936                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 937            elif getattr(other, 'imag', 0) != 0:
 938                return CObs(self.real * other.real - self.imag * other.imag,
 939                            self.imag * other.real + self.real * other.imag)
 940            else:
 941                return CObs(self.real * other.real, self.imag * other.real)
 942        else:
 943            return CObs(self.real * other, self.imag * other)
 944
 945    def __rmul__(self, other):
 946        return self * other
 947
 948    def __truediv__(self, other):
 949        if isinstance(other, np.ndarray):
 950            return 1 / (other / self)
 951        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 952            r = other.real ** 2 + other.imag ** 2
 953            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 954        else:
 955            return CObs(self.real / other, self.imag / other)
 956
 957    def __rtruediv__(self, other):
 958        r = self.real ** 2 + self.imag ** 2
 959        if hasattr(other, 'real') and hasattr(other, 'imag'):
 960            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 961        else:
 962            return CObs(self.real * other / r, -self.imag * other / r)
 963
 964    def __abs__(self):
 965        return np.sqrt(self.real**2 + self.imag**2)
 966
 967    def __pos__(self):
 968        return self
 969
 970    def __neg__(self):
 971        return -1 * self
 972
 973    def __eq__(self, other):
 974        return self.real == other.real and self.imag == other.imag
 975
 976    def __str__(self):
 977        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 978
 979    def __repr__(self):
 980        return 'CObs[' + str(self) + ']'
 981
 982
 983def _format_uncertainty(value, dvalue, significance=2):
 984    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
 985    if dvalue == 0.0 or (not np.isfinite(dvalue)):
 986        return str(value)
 987    if not isinstance(significance, int):
 988        raise TypeError("significance needs to be an integer.")
 989    if significance < 1:
 990        raise ValueError("significance needs to be larger than zero.")
 991    fexp = np.floor(np.log10(dvalue))
 992    if fexp < 0.0:
 993        return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f')
 994    elif fexp == 0.0:
 995        return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})"
 996    else:
 997        return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})"
 998
 999
1000def _expand_deltas(deltas, idx, shape, gapsize):
1001    """Expand deltas defined on idx to a regular range with spacing gapsize between two
1002       configurations and where holes are filled by 0.
1003       If idx is of type range, the deltas are not changed if the idx.step == gapsize.
1004
1005    Parameters
1006    ----------
1007    deltas : list
1008        List of fluctuations
1009    idx : list
1010        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1011    shape : int
1012        Number of configs in idx.
1013    gapsize : int
1014        The target distance between two configurations. If longer distances
1015        are found in idx, the data is expanded.
1016    """
1017    if isinstance(idx, range):
1018        if (idx.step == gapsize):
1019            return deltas
1020    ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize)
1021    for i in range(shape):
1022        ret[(idx[i] - idx[0]) // gapsize] = deltas[i]
1023    return ret
1024
1025
1026def _merge_idx(idl):
1027    """Returns the union of all lists in idl as range or sorted list
1028
1029    Parameters
1030    ----------
1031    idl : list
1032        List of lists or ranges.
1033    """
1034
1035    if _check_lists_equal(idl):
1036        return idl[0]
1037
1038    idunion = sorted(set().union(*idl))
1039
1040    # Check whether idunion can be expressed as range
1041    idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0])
1042    idtest = [list(idrange), idunion]
1043    if _check_lists_equal(idtest):
1044        return idrange
1045
1046    return idunion
1047
1048
1049def _intersection_idx(idl):
1050    """Returns the intersection of all lists in idl as range or sorted list
1051
1052    Parameters
1053    ----------
1054    idl : list
1055        List of lists or ranges.
1056    """
1057
1058    if _check_lists_equal(idl):
1059        return idl[0]
1060
1061    idinter = sorted(set.intersection(*[set(o) for o in idl]))
1062
1063    # Check whether idinter can be expressed as range
1064    try:
1065        idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0])
1066        idtest = [list(idrange), idinter]
1067        if _check_lists_equal(idtest):
1068            return idrange
1069    except IndexError:
1070        pass
1071
1072    return idinter
1073
1074
1075def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1076    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1077       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1078       common divisor of the step sizes is used as new step size.
1079
1080    Parameters
1081    ----------
1082    deltas : list
1083        List of fluctuations
1084    idx : list
1085        List or range of configs on which the deltas are defined.
1086        Has to be a subset of new_idx and has to be sorted in ascending order.
1087    shape : list
1088        Number of configs in idx.
1089    new_idx : list
1090        List of configs that defines the new range, has to be sorted in ascending order.
1091    """
1092
1093    if type(idx) is range and type(new_idx) is range:
1094        if idx == new_idx:
1095            return deltas
1096    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1097    for i in range(shape):
1098        ret[idx[i] - new_idx[0]] = deltas[i]
1099    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx)
1100
1101
1102def derived_observable(func, data, array_mode=False, **kwargs):
1103    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1104
1105    Parameters
1106    ----------
1107    func : object
1108        arbitrary function of the form func(data, **kwargs). For the
1109        automatic differentiation to work, all numpy functions have to have
1110        the autograd wrapper (use 'import autograd.numpy as anp').
1111    data : list
1112        list of Obs, e.g. [obs1, obs2, obs3].
1113    num_grad : bool
1114        if True, numerical derivatives are used instead of autograd
1115        (default False). To control the numerical differentiation the
1116        kwargs of numdifftools.step_generators.MaxStepGenerator
1117        can be used.
1118    man_grad : list
1119        manually supply a list or an array which contains the jacobian
1120        of func. Use cautiously, supplying the wrong derivative will
1121        not be intercepted.
1122
1123    Notes
1124    -----
1125    For simple mathematical operations it can be practical to use anonymous
1126    functions. For the ratio of two observables one can e.g. use
1127
1128    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1129    """
1130
1131    data = np.asarray(data)
1132    raveled_data = data.ravel()
1133
1134    # Workaround for matrix operations containing non Obs data
1135    if not all(isinstance(x, Obs) for x in raveled_data):
1136        for i in range(len(raveled_data)):
1137            if isinstance(raveled_data[i], (int, float)):
1138                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1139
1140    allcov = {}
1141    for o in raveled_data:
1142        for name in o.cov_names:
1143            if name in allcov:
1144                if not np.allclose(allcov[name], o.covobs[name].cov):
1145                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1146            else:
1147                allcov[name] = o.covobs[name].cov
1148
1149    n_obs = len(raveled_data)
1150    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1151    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1152    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1153
1154    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1155
1156    if data.ndim == 1:
1157        values = np.array([o.value for o in data])
1158    else:
1159        values = np.vectorize(lambda x: x.value)(data)
1160
1161    new_values = func(values, **kwargs)
1162
1163    multi = int(isinstance(new_values, np.ndarray))
1164
1165    new_r_values = {}
1166    new_idl_d = {}
1167    for name in new_sample_names:
1168        idl = []
1169        tmp_values = np.zeros(n_obs)
1170        for i, item in enumerate(raveled_data):
1171            tmp_values[i] = item.r_values.get(name, item.value)
1172            tmp_idl = item.idl.get(name)
1173            if tmp_idl is not None:
1174                idl.append(tmp_idl)
1175        if multi > 0:
1176            tmp_values = np.array(tmp_values).reshape(data.shape)
1177        new_r_values[name] = func(tmp_values, **kwargs)
1178        new_idl_d[name] = _merge_idx(idl)
1179
1180    if 'man_grad' in kwargs:
1181        deriv = np.asarray(kwargs.get('man_grad'))
1182        if new_values.shape + data.shape != deriv.shape:
1183            raise Exception('Manual derivative does not have correct shape.')
1184    elif kwargs.get('num_grad') is True:
1185        if multi > 0:
1186            raise Exception('Multi mode currently not supported for numerical derivative')
1187        options = {
1188            'base_step': 0.1,
1189            'step_ratio': 2.5}
1190        for key in options.keys():
1191            kwarg = kwargs.get(key)
1192            if kwarg is not None:
1193                options[key] = kwarg
1194        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1195        if tmp_df.size == 1:
1196            deriv = np.array([tmp_df.real])
1197        else:
1198            deriv = tmp_df.real
1199    else:
1200        deriv = jacobian(func)(values, **kwargs)
1201
1202    final_result = np.zeros(new_values.shape, dtype=object)
1203
1204    if array_mode is True:
1205
1206        class _Zero_grad():
1207            def __init__(self, N):
1208                self.grad = np.zeros((N, 1))
1209
1210        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1211        d_extracted = {}
1212        g_extracted = {}
1213        for name in new_sample_names:
1214            d_extracted[name] = []
1215            ens_length = len(new_idl_d[name])
1216            for i_dat, dat in enumerate(data):
1217                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1218        for name in new_cov_names:
1219            g_extracted[name] = []
1220            zero_grad = _Zero_grad(new_covobs_lengths[name])
1221            for i_dat, dat in enumerate(data):
1222                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1223
1224    for i_val, new_val in np.ndenumerate(new_values):
1225        new_deltas = {}
1226        new_grad = {}
1227        if array_mode is True:
1228            for name in new_sample_names:
1229                ens_length = d_extracted[name][0].shape[-1]
1230                new_deltas[name] = np.zeros(ens_length)
1231                for i_dat, dat in enumerate(d_extracted[name]):
1232                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1233            for name in new_cov_names:
1234                new_grad[name] = 0
1235                for i_dat, dat in enumerate(g_extracted[name]):
1236                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1237        else:
1238            for j_obs, obs in np.ndenumerate(data):
1239                for name in obs.names:
1240                    if name in obs.cov_names:
1241                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1242                    else:
1243                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1244
1245        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1246
1247        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1248            raise Exception('The same name has been used for deltas and covobs!')
1249        new_samples = []
1250        new_means = []
1251        new_idl = []
1252        new_names_obs = []
1253        for name in new_names:
1254            if name not in new_covobs:
1255                new_samples.append(new_deltas[name])
1256                new_idl.append(new_idl_d[name])
1257                new_means.append(new_r_values[name][i_val])
1258                new_names_obs.append(name)
1259        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1260        for name in new_covobs:
1261            final_result[i_val].names.append(name)
1262        final_result[i_val]._covobs = new_covobs
1263        final_result[i_val]._value = new_val
1264        final_result[i_val].reweighted = reweighted
1265
1266    if multi == 0:
1267        final_result = final_result.item()
1268
1269    return final_result
1270
1271
1272def _reduce_deltas(deltas, idx_old, idx_new):
1273    """Extract deltas defined on idx_old on all configs of idx_new.
1274
1275    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1276    are ordered in an ascending order.
1277
1278    Parameters
1279    ----------
1280    deltas : list
1281        List of fluctuations
1282    idx_old : list
1283        List or range of configs on which the deltas are defined
1284    idx_new : list
1285        List of configs for which we want to extract the deltas.
1286        Has to be a subset of idx_old.
1287    """
1288    if not len(deltas) == len(idx_old):
1289        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1290    if type(idx_old) is range and type(idx_new) is range:
1291        if idx_old == idx_new:
1292            return deltas
1293    if _check_lists_equal([idx_old, idx_new]):
1294        return deltas
1295    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1296    if len(indices) < len(idx_new):
1297        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1298    return np.array(deltas)[indices]
1299
1300
1301def reweight(weight, obs, **kwargs):
1302    """Reweight a list of observables.
1303
1304    Parameters
1305    ----------
1306    weight : Obs
1307        Reweighting factor. An Observable that has to be defined on a superset of the
1308        configurations in obs[i].idl for all i.
1309    obs : list
1310        list of Obs, e.g. [obs1, obs2, obs3].
1311    all_configs : bool
1312        if True, the reweighted observables are normalized by the average of
1313        the reweighting factor on all configurations in weight.idl and not
1314        on the configurations in obs[i].idl. Default False.
1315    """
1316    result = []
1317    for i in range(len(obs)):
1318        if len(obs[i].cov_names):
1319            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1320        if not set(obs[i].names).issubset(weight.names):
1321            raise Exception('Error: Ensembles do not fit')
1322        for name in obs[i].names:
1323            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1324                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1325        new_samples = []
1326        w_deltas = {}
1327        for name in sorted(obs[i].names):
1328            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1329            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1330        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1331
1332        if kwargs.get('all_configs'):
1333            new_weight = weight
1334        else:
1335            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1336
1337        result.append(tmp_obs / new_weight)
1338        result[-1].reweighted = True
1339
1340    return result
1341
1342
1343def correlate(obs_a, obs_b):
1344    """Correlate two observables.
1345
1346    Parameters
1347    ----------
1348    obs_a : Obs
1349        First observable
1350    obs_b : Obs
1351        Second observable
1352
1353    Notes
1354    -----
1355    Keep in mind to only correlate primary observables which have not been reweighted
1356    yet. The reweighting has to be applied after correlating the observables.
1357    Currently only works if ensembles are identical (this is not strictly necessary).
1358    """
1359
1360    if sorted(obs_a.names) != sorted(obs_b.names):
1361        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1362    if len(obs_a.cov_names) or len(obs_b.cov_names):
1363        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1364    for name in obs_a.names:
1365        if obs_a.shape[name] != obs_b.shape[name]:
1366            raise Exception('Shapes of ensemble', name, 'do not fit')
1367        if obs_a.idl[name] != obs_b.idl[name]:
1368            raise Exception('idl of ensemble', name, 'do not fit')
1369
1370    if obs_a.reweighted is True:
1371        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1372    if obs_b.reweighted is True:
1373        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1374
1375    new_samples = []
1376    new_idl = []
1377    for name in sorted(obs_a.names):
1378        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1379        new_idl.append(obs_a.idl[name])
1380
1381    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1382    o.reweighted = obs_a.reweighted or obs_b.reweighted
1383    return o
1384
1385
1386def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1387    r'''Calculates the error covariance matrix of a set of observables.
1388
1389    WARNING: This function should be used with care, especially for observables with support on multiple
1390             ensembles with differing autocorrelations. See the notes below for details.
1391
1392    The gamma method has to be applied first to all observables.
1393
1394    Parameters
1395    ----------
1396    obs : list or numpy.ndarray
1397        List or one dimensional array of Obs
1398    visualize : bool
1399        If True plots the corresponding normalized correlation matrix (default False).
1400    correlation : bool
1401        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1402    smooth : None or int
1403        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1404        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1405        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1406        small ones.
1407
1408    Notes
1409    -----
1410    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1411    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1412    in the absence of autocorrelation.
1413    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1414    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1415    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1416    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1417    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1418    '''
1419
1420    length = len(obs)
1421
1422    max_samples = np.max([o.N for o in obs])
1423    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1424        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1425
1426    cov = np.zeros((length, length))
1427    for i in range(length):
1428        for j in range(i, length):
1429            cov[i, j] = _covariance_element(obs[i], obs[j])
1430    cov = cov + cov.T - np.diag(np.diag(cov))
1431
1432    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1433
1434    if isinstance(smooth, int):
1435        corr = _smooth_eigenvalues(corr, smooth)
1436
1437    if visualize:
1438        plt.matshow(corr, vmin=-1, vmax=1)
1439        plt.set_cmap('RdBu')
1440        plt.colorbar()
1441        plt.draw()
1442
1443    if correlation is True:
1444        return corr
1445
1446    errors = [o.dvalue for o in obs]
1447    cov = np.diag(errors) @ corr @ np.diag(errors)
1448
1449    eigenvalues = np.linalg.eigh(cov)[0]
1450    if not np.all(eigenvalues >= 0):
1451        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1452
1453    return cov
1454
1455
1456def _smooth_eigenvalues(corr, E):
1457    """Eigenvalue smoothing as described in hep-lat/9412087
1458
1459    corr : np.ndarray
1460        correlation matrix
1461    E : integer
1462        Number of eigenvalues to be left substantially unchanged
1463    """
1464    if not (2 < E < corr.shape[0] - 1):
1465        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1466    vals, vec = np.linalg.eigh(corr)
1467    lambda_min = np.mean(vals[:-E])
1468    vals[vals < lambda_min] = lambda_min
1469    vals /= np.mean(vals)
1470    return vec @ np.diag(vals) @ vec.T
1471
1472
1473def _covariance_element(obs1, obs2):
1474    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1475
1476    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1477        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1478        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1479        return np.sum(deltas1 * deltas2)
1480
1481    if set(obs1.names).isdisjoint(set(obs2.names)):
1482        return 0.0
1483
1484    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1485        raise Exception('The gamma method has to be applied to both Obs first.')
1486
1487    dvalue = 0.0
1488
1489    for e_name in obs1.mc_names:
1490
1491        if e_name not in obs2.mc_names:
1492            continue
1493
1494        idl_d = {}
1495        for r_name in obs1.e_content[e_name]:
1496            if r_name not in obs2.e_content[e_name]:
1497                continue
1498            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1499
1500        gamma = 0.0
1501
1502        for r_name in obs1.e_content[e_name]:
1503            if r_name not in obs2.e_content[e_name]:
1504                continue
1505            if len(idl_d[r_name]) == 0:
1506                continue
1507            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1508
1509        if gamma == 0.0:
1510            continue
1511
1512        gamma_div = 0.0
1513        for r_name in obs1.e_content[e_name]:
1514            if r_name not in obs2.e_content[e_name]:
1515                continue
1516            if len(idl_d[r_name]) == 0:
1517                continue
1518            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1519        gamma /= gamma_div
1520
1521        dvalue += gamma
1522
1523    for e_name in obs1.cov_names:
1524
1525        if e_name not in obs2.cov_names:
1526            continue
1527
1528        dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item()
1529
1530    return dvalue
1531
1532
1533def import_jackknife(jacks, name, idl=None):
1534    """Imports jackknife samples and returns an Obs
1535
1536    Parameters
1537    ----------
1538    jacks : numpy.ndarray
1539        numpy array containing the mean value as zeroth entry and
1540        the N jackknife samples as first to Nth entry.
1541    name : str
1542        name of the ensemble the samples are defined on.
1543    """
1544    length = len(jacks) - 1
1545    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1546    samples = jacks[1:] @ prj
1547    mean = np.mean(samples)
1548    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1549    new_obs._value = jacks[0]
1550    return new_obs
1551
1552
1553def merge_obs(list_of_obs):
1554    """Combine all observables in list_of_obs into one new observable
1555
1556    Parameters
1557    ----------
1558    list_of_obs : list
1559        list of the Obs object to be combined
1560
1561    Notes
1562    -----
1563    It is not possible to combine obs which are based on the same replicum
1564    """
1565    replist = [item for obs in list_of_obs for item in obs.names]
1566    if (len(replist) == len(set(replist))) is False:
1567        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1568    if any([len(o.cov_names) for o in list_of_obs]):
1569        raise Exception('Not possible to merge data that contains covobs!')
1570    new_dict = {}
1571    idl_dict = {}
1572    for o in list_of_obs:
1573        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1574                        for key in set(o.deltas) | set(o.r_values)})
1575        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1576
1577    names = sorted(new_dict.keys())
1578    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1579    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1580    return o
1581
1582
1583def cov_Obs(means, cov, name, grad=None):
1584    """Create an Obs based on mean(s) and a covariance matrix
1585
1586    Parameters
1587    ----------
1588    mean : list of floats or float
1589        N mean value(s) of the new Obs
1590    cov : list or array
1591        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1592    name : str
1593        identifier for the covariance matrix
1594    grad : list or array
1595        Gradient of the Covobs wrt. the means belonging to cov.
1596    """
1597
1598    def covobs_to_obs(co):
1599        """Make an Obs out of a Covobs
1600
1601        Parameters
1602        ----------
1603        co : Covobs
1604            Covobs to be embedded into the Obs
1605        """
1606        o = Obs([], [], means=[])
1607        o._value = co.value
1608        o.names.append(co.name)
1609        o._covobs[co.name] = co
1610        o._dvalue = np.sqrt(co.errsq())
1611        return o
1612
1613    ol = []
1614    if isinstance(means, (float, int)):
1615        means = [means]
1616
1617    for i in range(len(means)):
1618        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1619    if ol[0].covobs[name].N != len(means):
1620        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1621    if len(ol) == 1:
1622        return ol[0]
1623    return ol
1624
1625
1626def _determine_gap(o, e_content, e_name):
1627    gaps = []
1628    for r_name in e_content[e_name]:
1629        if isinstance(o.idl[r_name], range):
1630            gaps.append(o.idl[r_name].step)
1631        else:
1632            gaps.append(np.min(np.diff(o.idl[r_name])))
1633
1634    gap = min(gaps)
1635    if not np.all([gi % gap == 0 for gi in gaps]):
1636        raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps)
1637
1638    return gap
1639
1640
1641def _check_lists_equal(idl):
1642    '''
1643    Use groupby to efficiently check whether all elements of idl are identical.
1644    Returns True if all elements are equal, otherwise False.
1645
1646    Parameters
1647    ----------
1648    idl : list of lists, ranges or np.ndarrays
1649    '''
1650    g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl])
1651    if next(g, True) and not next(g, False):
1652        return True
1653    return False
class Obs:
 18class Obs:
 19    """Class for a general observable.
 20
 21    Instances of Obs are the basic objects of a pyerrors error analysis.
 22    They are initialized with a list which contains arrays of samples for
 23    different ensembles/replica and another list of same length which contains
 24    the names of the ensembles/replica. Mathematical operations can be
 25    performed on instances. The result is another instance of Obs. The error of
 26    an instance can be computed with the gamma_method. Also contains additional
 27    methods for output and visualization of the error calculation.
 28
 29    Attributes
 30    ----------
 31    S_global : float
 32        Standard value for S (default 2.0)
 33    S_dict : dict
 34        Dictionary for S values. If an entry for a given ensemble
 35        exists this overwrites the standard value for that ensemble.
 36    tau_exp_global : float
 37        Standard value for tau_exp (default 0.0)
 38    tau_exp_dict : dict
 39        Dictionary for tau_exp values. If an entry for a given ensemble exists
 40        this overwrites the standard value for that ensemble.
 41    N_sigma_global : float
 42        Standard value for N_sigma (default 1.0)
 43    N_sigma_dict : dict
 44        Dictionary for N_sigma values. If an entry for a given ensemble exists
 45        this overwrites the standard value for that ensemble.
 46    """
 47    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 48                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 49                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 50                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 51                 'idl', 'tag', '_covobs', '__dict__']
 52
 53    S_global = 2.0
 54    S_dict = {}
 55    tau_exp_global = 0.0
 56    tau_exp_dict = {}
 57    N_sigma_global = 1.0
 58    N_sigma_dict = {}
 59
 60    def __init__(self, samples, names, idl=None, **kwargs):
 61        """ Initialize Obs object.
 62
 63        Parameters
 64        ----------
 65        samples : list
 66            list of numpy arrays containing the Monte Carlo samples
 67        names : list
 68            list of strings labeling the individual samples
 69        idl : list, optional
 70            list of ranges or lists on which the samples are defined
 71        """
 72
 73        if kwargs.get("means") is None and len(samples):
 74            if len(samples) != len(names):
 75                raise ValueError('Length of samples and names incompatible.')
 76            if idl is not None:
 77                if len(idl) != len(names):
 78                    raise ValueError('Length of idl incompatible with samples and names.')
 79            name_length = len(names)
 80            if name_length > 1:
 81                if name_length != len(set(names)):
 82                    raise ValueError('Names are not unique.')
 83                if not all(isinstance(x, str) for x in names):
 84                    raise TypeError('All names have to be strings.')
 85            else:
 86                if not isinstance(names[0], str):
 87                    raise TypeError('All names have to be strings.')
 88            if min(len(x) for x in samples) <= 4:
 89                raise ValueError('Samples have to have at least 5 entries.')
 90
 91        self.names = sorted(names)
 92        self.shape = {}
 93        self.r_values = {}
 94        self.deltas = {}
 95        self._covobs = {}
 96
 97        self._value = 0
 98        self.N = 0
 99        self.idl = {}
100        if idl is not None:
101            for name, idx in sorted(zip(names, idl)):
102                if isinstance(idx, range):
103                    self.idl[name] = idx
104                elif isinstance(idx, (list, np.ndarray)):
105                    dc = np.unique(np.diff(idx))
106                    if np.any(dc < 0):
107                        raise ValueError("Unsorted idx for idl[%s]" % (name))
108                    if len(dc) == 1:
109                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
110                    else:
111                        self.idl[name] = list(idx)
112                else:
113                    raise TypeError('incompatible type for idl[%s].' % (name))
114        else:
115            for name, sample in sorted(zip(names, samples)):
116                self.idl[name] = range(1, len(sample) + 1)
117
118        if kwargs.get("means") is not None:
119            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
120                self.shape[name] = len(self.idl[name])
121                self.N += self.shape[name]
122                self.r_values[name] = mean
123                self.deltas[name] = sample
124        else:
125            for name, sample in sorted(zip(names, samples)):
126                self.shape[name] = len(self.idl[name])
127                self.N += self.shape[name]
128                if len(sample) != self.shape[name]:
129                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
130                self.r_values[name] = np.mean(sample)
131                self.deltas[name] = sample - self.r_values[name]
132                self._value += self.shape[name] * self.r_values[name]
133            self._value /= self.N
134
135        self._dvalue = 0.0
136        self.ddvalue = 0.0
137        self.reweighted = False
138
139        self.tag = None
140
141    @property
142    def value(self):
143        return self._value
144
145    @property
146    def dvalue(self):
147        return self._dvalue
148
149    @property
150    def e_names(self):
151        return sorted(set([o.split('|')[0] for o in self.names]))
152
153    @property
154    def cov_names(self):
155        return sorted(set([o for o in self.covobs.keys()]))
156
157    @property
158    def mc_names(self):
159        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
160
161    @property
162    def e_content(self):
163        res = {}
164        for e, e_name in enumerate(self.e_names):
165            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
166            if e_name in self.names:
167                res[e_name].append(e_name)
168        return res
169
170    @property
171    def covobs(self):
172        return self._covobs
173
174    def gamma_method(self, **kwargs):
175        """Estimate the error and related properties of the Obs.
176
177        Parameters
178        ----------
179        S : float
180            specifies a custom value for the parameter S (default 2.0).
181            If set to 0 it is assumed that the data exhibits no
182            autocorrelation. In this case the error estimates coincides
183            with the sample standard error.
184        tau_exp : float
185            positive value triggers the critical slowing down analysis
186            (default 0.0).
187        N_sigma : float
188            number of standard deviations from zero until the tail is
189            attached to the autocorrelation function (default 1).
190        fft : bool
191            determines whether the fft algorithm is used for the computation
192            of the autocorrelation function (default True)
193        """
194
195        e_content = self.e_content
196        self.e_dvalue = {}
197        self.e_ddvalue = {}
198        self.e_tauint = {}
199        self.e_dtauint = {}
200        self.e_windowsize = {}
201        self.e_n_tauint = {}
202        self.e_n_dtauint = {}
203        e_gamma = {}
204        self.e_rho = {}
205        self.e_drho = {}
206        self._dvalue = 0
207        self.ddvalue = 0
208
209        self.S = {}
210        self.tau_exp = {}
211        self.N_sigma = {}
212
213        if kwargs.get('fft') is False:
214            fft = False
215        else:
216            fft = True
217
218        def _parse_kwarg(kwarg_name):
219            if kwarg_name in kwargs:
220                tmp = kwargs.get(kwarg_name)
221                if isinstance(tmp, (int, float)):
222                    if tmp < 0:
223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
224                    for e, e_name in enumerate(self.e_names):
225                        getattr(self, kwarg_name)[e_name] = tmp
226                else:
227                    raise TypeError(kwarg_name + ' is not in proper format.')
228            else:
229                for e, e_name in enumerate(self.e_names):
230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
232                    else:
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
234
235        _parse_kwarg('S')
236        _parse_kwarg('tau_exp')
237        _parse_kwarg('N_sigma')
238
239        for e, e_name in enumerate(self.mc_names):
240            gapsize = _determine_gap(self, e_content, e_name)
241
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
273            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
274            # Make sure no entry of tauint is smaller than 0.5
275            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
276            # hep-lat/0306017 eq. (42)
277            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
278            self.e_n_dtauint[e_name][0] = 0.0
279
280            def _compute_drho(i):
281                tmp = (self.e_rho[e_name][i + 1:w_max]
282                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
283                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
284                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
285                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
286
287            if self.tau_exp[e_name] > 0:
288                _compute_drho(1)
289                texp = self.tau_exp[e_name]
290                # Critical slowing down analysis
291                if w_max // 2 <= 1:
292                    raise Exception("Need at least 8 samples for tau_exp error analysis")
293                for n in range(1, w_max // 2):
294                    _compute_drho(n + 1)
295                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
296                        # Bias correction hep-lat/0306017 eq. (49) included
297                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
298                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
299                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
300                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
301                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
302                        self.e_windowsize[e_name] = n
303                        break
304            else:
305                if self.S[e_name] == 0.0:
306                    self.e_tauint[e_name] = 0.5
307                    self.e_dtauint[e_name] = 0.0
308                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
309                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
310                    self.e_windowsize[e_name] = 0
311                else:
312                    # Standard automatic windowing procedure
313                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
314                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
315                    for n in range(1, w_max):
316                        if g_w[n - 1] < 0 or n >= w_max - 1:
317                            _compute_drho(n)
318                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
319                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
320                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
321                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
322                            self.e_windowsize[e_name] = n
323                            break
324
325            self._dvalue += self.e_dvalue[e_name] ** 2
326            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
327
328        for e_name in self.cov_names:
329            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
330            self.e_ddvalue[e_name] = 0
331            self._dvalue += self.e_dvalue[e_name]**2
332
333        self._dvalue = np.sqrt(self._dvalue)
334        if self._dvalue == 0.0:
335            self.ddvalue = 0.0
336        else:
337            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
338        return
339
340    gm = gamma_method
341
342    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
343        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
344           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
345
346        Parameters
347        ----------
348        deltas : list
349            List of fluctuations
350        idx : list
351            List or range of configurations on which the deltas are defined.
352        shape : int
353            Number of configurations in idx.
354        w_max : int
355            Upper bound for the summation window.
356        fft : bool
357            determines whether the fft algorithm is used for the computation
358            of the autocorrelation function.
359        gapsize : int
360            The target distance between two configurations. If longer distances
361            are found in idx, the data is expanded.
362        """
363        gamma = np.zeros(w_max)
364        deltas = _expand_deltas(deltas, idx, shape, gapsize)
365        new_shape = len(deltas)
366        if fft:
367            max_gamma = min(new_shape, w_max)
368            # The padding for the fft has to be even
369            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
370            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
371        else:
372            for n in range(w_max):
373                if new_shape - n >= 0:
374                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
375
376        return gamma
377
378    def details(self, ens_content=True):
379        """Output detailed properties of the Obs.
380
381        Parameters
382        ----------
383        ens_content : bool
384            print details about the ensembles and replica if true.
385        """
386        if self.tag is not None:
387            print("Description:", self.tag)
388        if not hasattr(self, 'e_dvalue'):
389            print('Result\t %3.8e' % (self.value))
390        else:
391            if self.value == 0.0:
392                percentage = np.nan
393            else:
394                percentage = np.abs(self._dvalue / self.value) * 100
395            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
396            if len(self.e_names) > 1:
397                print(' Ensemble errors:')
398            e_content = self.e_content
399            for e_name in self.mc_names:
400                gap = _determine_gap(self, e_content, e_name)
401
402                if len(self.e_names) > 1:
403                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
404                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
405                tau_string += f" in units of {gap} config"
406                if gap > 1:
407                    tau_string += "s"
408                if self.tau_exp[e_name] > 0:
409                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
410                else:
411                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
412                print(tau_string)
413            for e_name in self.cov_names:
414                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
415        if ens_content is True:
416            if len(self.e_names) == 1:
417                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
418            else:
419                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
420            my_string_list = []
421            for key, value in sorted(self.e_content.items()):
422                if key not in self.covobs:
423                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
424                    if len(value) == 1:
425                        my_string += f': {self.shape[value[0]]} configurations'
426                        if isinstance(self.idl[value[0]], range):
427                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
428                        else:
429                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
430                    else:
431                        sublist = []
432                        for v in value:
433                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
434                            my_substring += f': {self.shape[v]} configurations'
435                            if isinstance(self.idl[v], range):
436                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
437                            else:
438                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
439                            sublist.append(my_substring)
440
441                        my_string += '\n' + '\n'.join(sublist)
442                else:
443                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
444                my_string_list.append(my_string)
445            print('\n'.join(my_string_list))
446
447    def reweight(self, weight):
448        """Reweight the obs with given rewighting factors.
449
450        Parameters
451        ----------
452        weight : Obs
453            Reweighting factor. An Observable that has to be defined on a superset of the
454            configurations in obs[i].idl for all i.
455        all_configs : bool
456            if True, the reweighted observables are normalized by the average of
457            the reweighting factor on all configurations in weight.idl and not
458            on the configurations in obs[i].idl. Default False.
459        """
460        return reweight(weight, [self])[0]
461
462    def is_zero_within_error(self, sigma=1):
463        """Checks whether the observable is zero within 'sigma' standard errors.
464
465        Parameters
466        ----------
467        sigma : int
468            Number of standard errors used for the check.
469
470        Works only properly when the gamma method was run.
471        """
472        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
473
474    def is_zero(self, atol=1e-10):
475        """Checks whether the observable is zero within a given tolerance.
476
477        Parameters
478        ----------
479        atol : float
480            Absolute tolerance (for details see numpy documentation).
481        """
482        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
483
484    def plot_tauint(self, save=None):
485        """Plot integrated autocorrelation time for each ensemble.
486
487        Parameters
488        ----------
489        save : str
490            saves the figure to a file named 'save' if.
491        """
492        if not hasattr(self, 'e_dvalue'):
493            raise Exception('Run the gamma method first.')
494
495        for e, e_name in enumerate(self.mc_names):
496            fig = plt.figure()
497            plt.xlabel(r'$W$')
498            plt.ylabel(r'$\tau_\mathrm{int}$')
499            length = int(len(self.e_n_tauint[e_name]))
500            if self.tau_exp[e_name] > 0:
501                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
502                x_help = np.arange(2 * self.tau_exp[e_name])
503                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
504                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
505                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
506                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
507                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
508                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
509                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
510            else:
511                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
512                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
513
514            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
515            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
516            plt.legend()
517            plt.xlim(-0.5, xmax)
518            ylim = plt.ylim()
519            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
520            plt.draw()
521            if save:
522                fig.savefig(save + "_" + str(e))
523
524    def plot_rho(self, save=None):
525        """Plot normalized autocorrelation function time for each ensemble.
526
527        Parameters
528        ----------
529        save : str
530            saves the figure to a file named 'save' if.
531        """
532        if not hasattr(self, 'e_dvalue'):
533            raise Exception('Run the gamma method first.')
534        for e, e_name in enumerate(self.mc_names):
535            fig = plt.figure()
536            plt.xlabel('W')
537            plt.ylabel('rho')
538            length = int(len(self.e_drho[e_name]))
539            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
540            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
541            if self.tau_exp[e_name] > 0:
542                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
543                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
544                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
545                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
546            else:
547                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
548                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
549            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
550            plt.xlim(-0.5, xmax)
551            plt.draw()
552            if save:
553                fig.savefig(save + "_" + str(e))
554
555    def plot_rep_dist(self):
556        """Plot replica distribution for each ensemble with more than one replicum."""
557        if not hasattr(self, 'e_dvalue'):
558            raise Exception('Run the gamma method first.')
559        for e, e_name in enumerate(self.mc_names):
560            if len(self.e_content[e_name]) == 1:
561                print('No replica distribution for a single replicum (', e_name, ')')
562                continue
563            r_length = []
564            sub_r_mean = 0
565            for r, r_name in enumerate(self.e_content[e_name]):
566                r_length.append(len(self.deltas[r_name]))
567                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
568            e_N = np.sum(r_length)
569            sub_r_mean /= e_N
570            arr = np.zeros(len(self.e_content[e_name]))
571            for r, r_name in enumerate(self.e_content[e_name]):
572                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
573            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
574            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
575            plt.draw()
576
577    def plot_history(self, expand=True):
578        """Plot derived Monte Carlo history for each ensemble
579
580        Parameters
581        ----------
582        expand : bool
583            show expanded history for irregular Monte Carlo chains (default: True).
584        """
585        for e, e_name in enumerate(self.mc_names):
586            plt.figure()
587            r_length = []
588            tmp = []
589            tmp_expanded = []
590            for r, r_name in enumerate(self.e_content[e_name]):
591                tmp.append(self.deltas[r_name] + self.r_values[r_name])
592                if expand:
593                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
594                    r_length.append(len(tmp_expanded[-1]))
595                else:
596                    r_length.append(len(tmp[-1]))
597            e_N = np.sum(r_length)
598            x = np.arange(e_N)
599            y_test = np.concatenate(tmp, axis=0)
600            if expand:
601                y = np.concatenate(tmp_expanded, axis=0)
602            else:
603                y = y_test
604            plt.errorbar(x, y, fmt='.', markersize=3)
605            plt.xlim(-0.5, e_N - 0.5)
606            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
607            plt.draw()
608
609    def plot_piechart(self, save=None):
610        """Plot piechart which shows the fractional contribution of each
611        ensemble to the error and returns a dictionary containing the fractions.
612
613        Parameters
614        ----------
615        save : str
616            saves the figure to a file named 'save' if.
617        """
618        if not hasattr(self, 'e_dvalue'):
619            raise Exception('Run the gamma method first.')
620        if np.isclose(0.0, self._dvalue, atol=1e-15):
621            raise Exception('Error is 0.0')
622        labels = self.e_names
623        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
624        fig1, ax1 = plt.subplots()
625        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
626        ax1.axis('equal')
627        plt.draw()
628        if save:
629            fig1.savefig(save)
630
631        return dict(zip(self.e_names, sizes))
632
633    def dump(self, filename, datatype="json.gz", description="", **kwargs):
634        """Dump the Obs to a file 'name' of chosen format.
635
636        Parameters
637        ----------
638        filename : str
639            name of the file to be saved.
640        datatype : str
641            Format of the exported file. Supported formats include
642            "json.gz" and "pickle"
643        description : str
644            Description for output file, only relevant for json.gz format.
645        path : str
646            specifies a custom path for the file (default '.')
647        """
648        if 'path' in kwargs:
649            file_name = kwargs.get('path') + '/' + filename
650        else:
651            file_name = filename
652
653        if datatype == "json.gz":
654            from .input.json import dump_to_json
655            dump_to_json([self], file_name, description=description)
656        elif datatype == "pickle":
657            with open(file_name + '.p', 'wb') as fb:
658                pickle.dump(self, fb)
659        else:
660            raise Exception("Unknown datatype " + str(datatype))
661
662    def export_jackknife(self):
663        """Export jackknife samples from the Obs
664
665        Returns
666        -------
667        numpy.ndarray
668            Returns a numpy array of length N + 1 where N is the number of samples
669            for the given ensemble and replicum. The zeroth entry of the array contains
670            the mean value of the Obs, entries 1 to N contain the N jackknife samples
671            derived from the Obs. The current implementation only works for observables
672            defined on exactly one ensemble and replicum. The derived jackknife samples
673            should agree with samples from a full jackknife analysis up to O(1/N).
674        """
675
676        if len(self.names) != 1:
677            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
678
679        name = self.names[0]
680        full_data = self.deltas[name] + self.r_values[name]
681        n = full_data.size
682        mean = self.value
683        tmp_jacks = np.zeros(n + 1)
684        tmp_jacks[0] = mean
685        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
686        return tmp_jacks
687
688    def __float__(self):
689        return float(self.value)
690
691    def __repr__(self):
692        return 'Obs[' + str(self) + ']'
693
694    def __str__(self):
695        return _format_uncertainty(self.value, self._dvalue)
696
697    def __format__(self, format_type):
698        if format_type == "":
699            significance = 2
700        else:
701            significance = int(float(format_type.replace("+", "").replace("-", "")))
702        my_str = _format_uncertainty(self.value, self._dvalue,
703                                     significance=significance)
704        for char in ["+", " "]:
705            if format_type.startswith(char):
706                if my_str[0] != "-":
707                    my_str = char + my_str
708        return my_str
709
710    def __hash__(self):
711        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
712        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
713        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
714        hash_tuple += tuple([o.encode() for o in self.names])
715        m = hashlib.md5()
716        [m.update(o) for o in hash_tuple]
717        return int(m.hexdigest(), 16) & 0xFFFFFFFF
718
719    # Overload comparisons
720    def __lt__(self, other):
721        return self.value < other
722
723    def __le__(self, other):
724        return self.value <= other
725
726    def __gt__(self, other):
727        return self.value > other
728
729    def __ge__(self, other):
730        return self.value >= other
731
732    def __eq__(self, other):
733        return (self - other).is_zero()
734
735    def __ne__(self, other):
736        return not (self - other).is_zero()
737
738    # Overload math operations
739    def __add__(self, y):
740        if isinstance(y, Obs):
741            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
742        else:
743            if isinstance(y, np.ndarray):
744                return np.array([self + o for o in y])
745            elif y.__class__.__name__ in ['Corr', 'CObs']:
746                return NotImplemented
747            else:
748                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
749
750    def __radd__(self, y):
751        return self + y
752
753    def __mul__(self, y):
754        if isinstance(y, Obs):
755            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
756        else:
757            if isinstance(y, np.ndarray):
758                return np.array([self * o for o in y])
759            elif isinstance(y, complex):
760                return CObs(self * y.real, self * y.imag)
761            elif y.__class__.__name__ in ['Corr', 'CObs']:
762                return NotImplemented
763            else:
764                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
765
766    def __rmul__(self, y):
767        return self * y
768
769    def __sub__(self, y):
770        if isinstance(y, Obs):
771            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
772        else:
773            if isinstance(y, np.ndarray):
774                return np.array([self - o for o in y])
775            elif y.__class__.__name__ in ['Corr', 'CObs']:
776                return NotImplemented
777            else:
778                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
779
780    def __rsub__(self, y):
781        return -1 * (self - y)
782
783    def __pos__(self):
784        return self
785
786    def __neg__(self):
787        return -1 * self
788
789    def __truediv__(self, y):
790        if isinstance(y, Obs):
791            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
792        else:
793            if isinstance(y, np.ndarray):
794                return np.array([self / o for o in y])
795            elif y.__class__.__name__ in ['Corr', 'CObs']:
796                return NotImplemented
797            else:
798                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
799
800    def __rtruediv__(self, y):
801        if isinstance(y, Obs):
802            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
803        else:
804            if isinstance(y, np.ndarray):
805                return np.array([o / self for o in y])
806            elif y.__class__.__name__ in ['Corr', 'CObs']:
807                return NotImplemented
808            else:
809                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
810
811    def __pow__(self, y):
812        if isinstance(y, Obs):
813            return derived_observable(lambda x: x[0] ** x[1], [self, y])
814        else:
815            return derived_observable(lambda x: x[0] ** y, [self])
816
817    def __rpow__(self, y):
818        if isinstance(y, Obs):
819            return derived_observable(lambda x: x[0] ** x[1], [y, self])
820        else:
821            return derived_observable(lambda x: y ** x[0], [self])
822
823    def __abs__(self):
824        return derived_observable(lambda x: anp.abs(x[0]), [self])
825
826    # Overload numpy functions
827    def sqrt(self):
828        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
829
830    def log(self):
831        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
832
833    def exp(self):
834        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
835
836    def sin(self):
837        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
838
839    def cos(self):
840        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
841
842    def tan(self):
843        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
844
845    def arcsin(self):
846        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
847
848    def arccos(self):
849        return derived_observable(lambda x: anp.arccos(x[0]), [self])
850
851    def arctan(self):
852        return derived_observable(lambda x: anp.arctan(x[0]), [self])
853
854    def sinh(self):
855        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
856
857    def cosh(self):
858        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
859
860    def tanh(self):
861        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
862
863    def arcsinh(self):
864        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
865
866    def arccosh(self):
867        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
868
869    def arctanh(self):
870        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 60    def __init__(self, samples, names, idl=None, **kwargs):
 61        """ Initialize Obs object.
 62
 63        Parameters
 64        ----------
 65        samples : list
 66            list of numpy arrays containing the Monte Carlo samples
 67        names : list
 68            list of strings labeling the individual samples
 69        idl : list, optional
 70            list of ranges or lists on which the samples are defined
 71        """
 72
 73        if kwargs.get("means") is None and len(samples):
 74            if len(samples) != len(names):
 75                raise ValueError('Length of samples and names incompatible.')
 76            if idl is not None:
 77                if len(idl) != len(names):
 78                    raise ValueError('Length of idl incompatible with samples and names.')
 79            name_length = len(names)
 80            if name_length > 1:
 81                if name_length != len(set(names)):
 82                    raise ValueError('Names are not unique.')
 83                if not all(isinstance(x, str) for x in names):
 84                    raise TypeError('All names have to be strings.')
 85            else:
 86                if not isinstance(names[0], str):
 87                    raise TypeError('All names have to be strings.')
 88            if min(len(x) for x in samples) <= 4:
 89                raise ValueError('Samples have to have at least 5 entries.')
 90
 91        self.names = sorted(names)
 92        self.shape = {}
 93        self.r_values = {}
 94        self.deltas = {}
 95        self._covobs = {}
 96
 97        self._value = 0
 98        self.N = 0
 99        self.idl = {}
100        if idl is not None:
101            for name, idx in sorted(zip(names, idl)):
102                if isinstance(idx, range):
103                    self.idl[name] = idx
104                elif isinstance(idx, (list, np.ndarray)):
105                    dc = np.unique(np.diff(idx))
106                    if np.any(dc < 0):
107                        raise ValueError("Unsorted idx for idl[%s]" % (name))
108                    if len(dc) == 1:
109                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
110                    else:
111                        self.idl[name] = list(idx)
112                else:
113                    raise TypeError('incompatible type for idl[%s].' % (name))
114        else:
115            for name, sample in sorted(zip(names, samples)):
116                self.idl[name] = range(1, len(sample) + 1)
117
118        if kwargs.get("means") is not None:
119            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
120                self.shape[name] = len(self.idl[name])
121                self.N += self.shape[name]
122                self.r_values[name] = mean
123                self.deltas[name] = sample
124        else:
125            for name, sample in sorted(zip(names, samples)):
126                self.shape[name] = len(self.idl[name])
127                self.N += self.shape[name]
128                if len(sample) != self.shape[name]:
129                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
130                self.r_values[name] = np.mean(sample)
131                self.deltas[name] = sample - self.r_values[name]
132                self._value += self.shape[name] * self.r_values[name]
133            self._value /= self.N
134
135        self._dvalue = 0.0
136        self.ddvalue = 0.0
137        self.reweighted = False
138
139        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
def gamma_method(self, **kwargs):
174    def gamma_method(self, **kwargs):
175        """Estimate the error and related properties of the Obs.
176
177        Parameters
178        ----------
179        S : float
180            specifies a custom value for the parameter S (default 2.0).
181            If set to 0 it is assumed that the data exhibits no
182            autocorrelation. In this case the error estimates coincides
183            with the sample standard error.
184        tau_exp : float
185            positive value triggers the critical slowing down analysis
186            (default 0.0).
187        N_sigma : float
188            number of standard deviations from zero until the tail is
189            attached to the autocorrelation function (default 1).
190        fft : bool
191            determines whether the fft algorithm is used for the computation
192            of the autocorrelation function (default True)
193        """
194
195        e_content = self.e_content
196        self.e_dvalue = {}
197        self.e_ddvalue = {}
198        self.e_tauint = {}
199        self.e_dtauint = {}
200        self.e_windowsize = {}
201        self.e_n_tauint = {}
202        self.e_n_dtauint = {}
203        e_gamma = {}
204        self.e_rho = {}
205        self.e_drho = {}
206        self._dvalue = 0
207        self.ddvalue = 0
208
209        self.S = {}
210        self.tau_exp = {}
211        self.N_sigma = {}
212
213        if kwargs.get('fft') is False:
214            fft = False
215        else:
216            fft = True
217
218        def _parse_kwarg(kwarg_name):
219            if kwarg_name in kwargs:
220                tmp = kwargs.get(kwarg_name)
221                if isinstance(tmp, (int, float)):
222                    if tmp < 0:
223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
224                    for e, e_name in enumerate(self.e_names):
225                        getattr(self, kwarg_name)[e_name] = tmp
226                else:
227                    raise TypeError(kwarg_name + ' is not in proper format.')
228            else:
229                for e, e_name in enumerate(self.e_names):
230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
232                    else:
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
234
235        _parse_kwarg('S')
236        _parse_kwarg('tau_exp')
237        _parse_kwarg('N_sigma')
238
239        for e, e_name in enumerate(self.mc_names):
240            gapsize = _determine_gap(self, e_content, e_name)
241
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
273            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
274            # Make sure no entry of tauint is smaller than 0.5
275            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
276            # hep-lat/0306017 eq. (42)
277            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
278            self.e_n_dtauint[e_name][0] = 0.0
279
280            def _compute_drho(i):
281                tmp = (self.e_rho[e_name][i + 1:w_max]
282                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
283                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
284                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
285                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
286
287            if self.tau_exp[e_name] > 0:
288                _compute_drho(1)
289                texp = self.tau_exp[e_name]
290                # Critical slowing down analysis
291                if w_max // 2 <= 1:
292                    raise Exception("Need at least 8 samples for tau_exp error analysis")
293                for n in range(1, w_max // 2):
294                    _compute_drho(n + 1)
295                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
296                        # Bias correction hep-lat/0306017 eq. (49) included
297                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
298                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
299                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
300                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
301                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
302                        self.e_windowsize[e_name] = n
303                        break
304            else:
305                if self.S[e_name] == 0.0:
306                    self.e_tauint[e_name] = 0.5
307                    self.e_dtauint[e_name] = 0.0
308                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
309                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
310                    self.e_windowsize[e_name] = 0
311                else:
312                    # Standard automatic windowing procedure
313                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
314                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
315                    for n in range(1, w_max):
316                        if g_w[n - 1] < 0 or n >= w_max - 1:
317                            _compute_drho(n)
318                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
319                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
320                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
321                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
322                            self.e_windowsize[e_name] = n
323                            break
324
325            self._dvalue += self.e_dvalue[e_name] ** 2
326            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
327
328        for e_name in self.cov_names:
329            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
330            self.e_ddvalue[e_name] = 0
331            self._dvalue += self.e_dvalue[e_name]**2
332
333        self._dvalue = np.sqrt(self._dvalue)
334        if self._dvalue == 0.0:
335            self.ddvalue = 0.0
336        else:
337            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
338        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def gm(self, **kwargs):
174    def gamma_method(self, **kwargs):
175        """Estimate the error and related properties of the Obs.
176
177        Parameters
178        ----------
179        S : float
180            specifies a custom value for the parameter S (default 2.0).
181            If set to 0 it is assumed that the data exhibits no
182            autocorrelation. In this case the error estimates coincides
183            with the sample standard error.
184        tau_exp : float
185            positive value triggers the critical slowing down analysis
186            (default 0.0).
187        N_sigma : float
188            number of standard deviations from zero until the tail is
189            attached to the autocorrelation function (default 1).
190        fft : bool
191            determines whether the fft algorithm is used for the computation
192            of the autocorrelation function (default True)
193        """
194
195        e_content = self.e_content
196        self.e_dvalue = {}
197        self.e_ddvalue = {}
198        self.e_tauint = {}
199        self.e_dtauint = {}
200        self.e_windowsize = {}
201        self.e_n_tauint = {}
202        self.e_n_dtauint = {}
203        e_gamma = {}
204        self.e_rho = {}
205        self.e_drho = {}
206        self._dvalue = 0
207        self.ddvalue = 0
208
209        self.S = {}
210        self.tau_exp = {}
211        self.N_sigma = {}
212
213        if kwargs.get('fft') is False:
214            fft = False
215        else:
216            fft = True
217
218        def _parse_kwarg(kwarg_name):
219            if kwarg_name in kwargs:
220                tmp = kwargs.get(kwarg_name)
221                if isinstance(tmp, (int, float)):
222                    if tmp < 0:
223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
224                    for e, e_name in enumerate(self.e_names):
225                        getattr(self, kwarg_name)[e_name] = tmp
226                else:
227                    raise TypeError(kwarg_name + ' is not in proper format.')
228            else:
229                for e, e_name in enumerate(self.e_names):
230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
232                    else:
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
234
235        _parse_kwarg('S')
236        _parse_kwarg('tau_exp')
237        _parse_kwarg('N_sigma')
238
239        for e, e_name in enumerate(self.mc_names):
240            gapsize = _determine_gap(self, e_content, e_name)
241
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
273            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
274            # Make sure no entry of tauint is smaller than 0.5
275            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
276            # hep-lat/0306017 eq. (42)
277            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
278            self.e_n_dtauint[e_name][0] = 0.0
279
280            def _compute_drho(i):
281                tmp = (self.e_rho[e_name][i + 1:w_max]
282                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
283                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
284                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
285                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
286
287            if self.tau_exp[e_name] > 0:
288                _compute_drho(1)
289                texp = self.tau_exp[e_name]
290                # Critical slowing down analysis
291                if w_max // 2 <= 1:
292                    raise Exception("Need at least 8 samples for tau_exp error analysis")
293                for n in range(1, w_max // 2):
294                    _compute_drho(n + 1)
295                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
296                        # Bias correction hep-lat/0306017 eq. (49) included
297                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
298                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
299                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
300                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
301                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
302                        self.e_windowsize[e_name] = n
303                        break
304            else:
305                if self.S[e_name] == 0.0:
306                    self.e_tauint[e_name] = 0.5
307                    self.e_dtauint[e_name] = 0.0
308                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
309                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
310                    self.e_windowsize[e_name] = 0
311                else:
312                    # Standard automatic windowing procedure
313                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
314                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
315                    for n in range(1, w_max):
316                        if g_w[n - 1] < 0 or n >= w_max - 1:
317                            _compute_drho(n)
318                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
319                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
320                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
321                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
322                            self.e_windowsize[e_name] = n
323                            break
324
325            self._dvalue += self.e_dvalue[e_name] ** 2
326            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
327
328        for e_name in self.cov_names:
329            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
330            self.e_ddvalue[e_name] = 0
331            self._dvalue += self.e_dvalue[e_name]**2
332
333        self._dvalue = np.sqrt(self._dvalue)
334        if self._dvalue == 0.0:
335            self.ddvalue = 0.0
336        else:
337            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
338        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True):
378    def details(self, ens_content=True):
379        """Output detailed properties of the Obs.
380
381        Parameters
382        ----------
383        ens_content : bool
384            print details about the ensembles and replica if true.
385        """
386        if self.tag is not None:
387            print("Description:", self.tag)
388        if not hasattr(self, 'e_dvalue'):
389            print('Result\t %3.8e' % (self.value))
390        else:
391            if self.value == 0.0:
392                percentage = np.nan
393            else:
394                percentage = np.abs(self._dvalue / self.value) * 100
395            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
396            if len(self.e_names) > 1:
397                print(' Ensemble errors:')
398            e_content = self.e_content
399            for e_name in self.mc_names:
400                gap = _determine_gap(self, e_content, e_name)
401
402                if len(self.e_names) > 1:
403                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
404                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
405                tau_string += f" in units of {gap} config"
406                if gap > 1:
407                    tau_string += "s"
408                if self.tau_exp[e_name] > 0:
409                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
410                else:
411                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
412                print(tau_string)
413            for e_name in self.cov_names:
414                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
415        if ens_content is True:
416            if len(self.e_names) == 1:
417                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
418            else:
419                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
420            my_string_list = []
421            for key, value in sorted(self.e_content.items()):
422                if key not in self.covobs:
423                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
424                    if len(value) == 1:
425                        my_string += f': {self.shape[value[0]]} configurations'
426                        if isinstance(self.idl[value[0]], range):
427                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
428                        else:
429                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
430                    else:
431                        sublist = []
432                        for v in value:
433                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
434                            my_substring += f': {self.shape[v]} configurations'
435                            if isinstance(self.idl[v], range):
436                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
437                            else:
438                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
439                            sublist.append(my_substring)
440
441                        my_string += '\n' + '\n'.join(sublist)
442                else:
443                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
444                my_string_list.append(my_string)
445            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def reweight(self, weight):
447    def reweight(self, weight):
448        """Reweight the obs with given rewighting factors.
449
450        Parameters
451        ----------
452        weight : Obs
453            Reweighting factor. An Observable that has to be defined on a superset of the
454            configurations in obs[i].idl for all i.
455        all_configs : bool
456            if True, the reweighted observables are normalized by the average of
457            the reweighting factor on all configurations in weight.idl and not
458            on the configurations in obs[i].idl. Default False.
459        """
460        return reweight(weight, [self])[0]

Reweight the obs with given rewighting factors.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def is_zero_within_error(self, sigma=1):
462    def is_zero_within_error(self, sigma=1):
463        """Checks whether the observable is zero within 'sigma' standard errors.
464
465        Parameters
466        ----------
467        sigma : int
468            Number of standard errors used for the check.
469
470        Works only properly when the gamma method was run.
471        """
472        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10):
474    def is_zero(self, atol=1e-10):
475        """Checks whether the observable is zero within a given tolerance.
476
477        Parameters
478        ----------
479        atol : float
480            Absolute tolerance (for details see numpy documentation).
481        """
482        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None):
484    def plot_tauint(self, save=None):
485        """Plot integrated autocorrelation time for each ensemble.
486
487        Parameters
488        ----------
489        save : str
490            saves the figure to a file named 'save' if.
491        """
492        if not hasattr(self, 'e_dvalue'):
493            raise Exception('Run the gamma method first.')
494
495        for e, e_name in enumerate(self.mc_names):
496            fig = plt.figure()
497            plt.xlabel(r'$W$')
498            plt.ylabel(r'$\tau_\mathrm{int}$')
499            length = int(len(self.e_n_tauint[e_name]))
500            if self.tau_exp[e_name] > 0:
501                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
502                x_help = np.arange(2 * self.tau_exp[e_name])
503                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
504                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
505                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
506                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
507                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
508                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
509                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
510            else:
511                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
512                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
513
514            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
515            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
516            plt.legend()
517            plt.xlim(-0.5, xmax)
518            ylim = plt.ylim()
519            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
520            plt.draw()
521            if save:
522                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None):
524    def plot_rho(self, save=None):
525        """Plot normalized autocorrelation function time for each ensemble.
526
527        Parameters
528        ----------
529        save : str
530            saves the figure to a file named 'save' if.
531        """
532        if not hasattr(self, 'e_dvalue'):
533            raise Exception('Run the gamma method first.')
534        for e, e_name in enumerate(self.mc_names):
535            fig = plt.figure()
536            plt.xlabel('W')
537            plt.ylabel('rho')
538            length = int(len(self.e_drho[e_name]))
539            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
540            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
541            if self.tau_exp[e_name] > 0:
542                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
543                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
544                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
545                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
546            else:
547                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
548                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
549            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
550            plt.xlim(-0.5, xmax)
551            plt.draw()
552            if save:
553                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self):
555    def plot_rep_dist(self):
556        """Plot replica distribution for each ensemble with more than one replicum."""
557        if not hasattr(self, 'e_dvalue'):
558            raise Exception('Run the gamma method first.')
559        for e, e_name in enumerate(self.mc_names):
560            if len(self.e_content[e_name]) == 1:
561                print('No replica distribution for a single replicum (', e_name, ')')
562                continue
563            r_length = []
564            sub_r_mean = 0
565            for r, r_name in enumerate(self.e_content[e_name]):
566                r_length.append(len(self.deltas[r_name]))
567                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
568            e_N = np.sum(r_length)
569            sub_r_mean /= e_N
570            arr = np.zeros(len(self.e_content[e_name]))
571            for r, r_name in enumerate(self.e_content[e_name]):
572                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
573            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
574            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
575            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True):
577    def plot_history(self, expand=True):
578        """Plot derived Monte Carlo history for each ensemble
579
580        Parameters
581        ----------
582        expand : bool
583            show expanded history for irregular Monte Carlo chains (default: True).
584        """
585        for e, e_name in enumerate(self.mc_names):
586            plt.figure()
587            r_length = []
588            tmp = []
589            tmp_expanded = []
590            for r, r_name in enumerate(self.e_content[e_name]):
591                tmp.append(self.deltas[r_name] + self.r_values[r_name])
592                if expand:
593                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
594                    r_length.append(len(tmp_expanded[-1]))
595                else:
596                    r_length.append(len(tmp[-1]))
597            e_N = np.sum(r_length)
598            x = np.arange(e_N)
599            y_test = np.concatenate(tmp, axis=0)
600            if expand:
601                y = np.concatenate(tmp_expanded, axis=0)
602            else:
603                y = y_test
604            plt.errorbar(x, y, fmt='.', markersize=3)
605            plt.xlim(-0.5, e_N - 0.5)
606            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
607            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None):
609    def plot_piechart(self, save=None):
610        """Plot piechart which shows the fractional contribution of each
611        ensemble to the error and returns a dictionary containing the fractions.
612
613        Parameters
614        ----------
615        save : str
616            saves the figure to a file named 'save' if.
617        """
618        if not hasattr(self, 'e_dvalue'):
619            raise Exception('Run the gamma method first.')
620        if np.isclose(0.0, self._dvalue, atol=1e-15):
621            raise Exception('Error is 0.0')
622        labels = self.e_names
623        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
624        fig1, ax1 = plt.subplots()
625        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
626        ax1.axis('equal')
627        plt.draw()
628        if save:
629            fig1.savefig(save)
630
631        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs):
633    def dump(self, filename, datatype="json.gz", description="", **kwargs):
634        """Dump the Obs to a file 'name' of chosen format.
635
636        Parameters
637        ----------
638        filename : str
639            name of the file to be saved.
640        datatype : str
641            Format of the exported file. Supported formats include
642            "json.gz" and "pickle"
643        description : str
644            Description for output file, only relevant for json.gz format.
645        path : str
646            specifies a custom path for the file (default '.')
647        """
648        if 'path' in kwargs:
649            file_name = kwargs.get('path') + '/' + filename
650        else:
651            file_name = filename
652
653        if datatype == "json.gz":
654            from .input.json import dump_to_json
655            dump_to_json([self], file_name, description=description)
656        elif datatype == "pickle":
657            with open(file_name + '.p', 'wb') as fb:
658                pickle.dump(self, fb)
659        else:
660            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self):
662    def export_jackknife(self):
663        """Export jackknife samples from the Obs
664
665        Returns
666        -------
667        numpy.ndarray
668            Returns a numpy array of length N + 1 where N is the number of samples
669            for the given ensemble and replicum. The zeroth entry of the array contains
670            the mean value of the Obs, entries 1 to N contain the N jackknife samples
671            derived from the Obs. The current implementation only works for observables
672            defined on exactly one ensemble and replicum. The derived jackknife samples
673            should agree with samples from a full jackknife analysis up to O(1/N).
674        """
675
676        if len(self.names) != 1:
677            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
678
679        name = self.names[0]
680        full_data = self.deltas[name] + self.r_values[name]
681        n = full_data.size
682        mean = self.value
683        tmp_jacks = np.zeros(n + 1)
684        tmp_jacks[0] = mean
685        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
686        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def sqrt(self):
827    def sqrt(self):
828        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self):
830    def log(self):
831        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self):
833    def exp(self):
834        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self):
836    def sin(self):
837        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self):
839    def cos(self):
840        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self):
842    def tan(self):
843        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self):
845    def arcsin(self):
846        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self):
848    def arccos(self):
849        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self):
851    def arctan(self):
852        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self):
854    def sinh(self):
855        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self):
857    def cosh(self):
858        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self):
860    def tanh(self):
861        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self):
863    def arcsinh(self):
864        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self):
866    def arccosh(self):
867        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self):
869    def arctanh(self):
870        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
class CObs:
873class CObs:
874    """Class for a complex valued observable."""
875    __slots__ = ['_real', '_imag', 'tag']
876
877    def __init__(self, real, imag=0.0):
878        self._real = real
879        self._imag = imag
880        self.tag = None
881
882    @property
883    def real(self):
884        return self._real
885
886    @property
887    def imag(self):
888        return self._imag
889
890    def gamma_method(self, **kwargs):
891        """Executes the gamma_method for the real and the imaginary part."""
892        if isinstance(self.real, Obs):
893            self.real.gamma_method(**kwargs)
894        if isinstance(self.imag, Obs):
895            self.imag.gamma_method(**kwargs)
896
897    def is_zero(self):
898        """Checks whether both real and imaginary part are zero within machine precision."""
899        return self.real == 0.0 and self.imag == 0.0
900
901    def conjugate(self):
902        return CObs(self.real, -self.imag)
903
904    def __add__(self, other):
905        if isinstance(other, np.ndarray):
906            return other + self
907        elif hasattr(other, 'real') and hasattr(other, 'imag'):
908            return CObs(self.real + other.real,
909                        self.imag + other.imag)
910        else:
911            return CObs(self.real + other, self.imag)
912
913    def __radd__(self, y):
914        return self + y
915
916    def __sub__(self, other):
917        if isinstance(other, np.ndarray):
918            return -1 * (other - self)
919        elif hasattr(other, 'real') and hasattr(other, 'imag'):
920            return CObs(self.real - other.real, self.imag - other.imag)
921        else:
922            return CObs(self.real - other, self.imag)
923
924    def __rsub__(self, other):
925        return -1 * (self - other)
926
927    def __mul__(self, other):
928        if isinstance(other, np.ndarray):
929            return other * self
930        elif hasattr(other, 'real') and hasattr(other, 'imag'):
931            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
932                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
933                                               [self.real, other.real, self.imag, other.imag],
934                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
935                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
936                                               [self.real, other.real, self.imag, other.imag],
937                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
938            elif getattr(other, 'imag', 0) != 0:
939                return CObs(self.real * other.real - self.imag * other.imag,
940                            self.imag * other.real + self.real * other.imag)
941            else:
942                return CObs(self.real * other.real, self.imag * other.real)
943        else:
944            return CObs(self.real * other, self.imag * other)
945
946    def __rmul__(self, other):
947        return self * other
948
949    def __truediv__(self, other):
950        if isinstance(other, np.ndarray):
951            return 1 / (other / self)
952        elif hasattr(other, 'real') and hasattr(other, 'imag'):
953            r = other.real ** 2 + other.imag ** 2
954            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
955        else:
956            return CObs(self.real / other, self.imag / other)
957
958    def __rtruediv__(self, other):
959        r = self.real ** 2 + self.imag ** 2
960        if hasattr(other, 'real') and hasattr(other, 'imag'):
961            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
962        else:
963            return CObs(self.real * other / r, -self.imag * other / r)
964
965    def __abs__(self):
966        return np.sqrt(self.real**2 + self.imag**2)
967
968    def __pos__(self):
969        return self
970
971    def __neg__(self):
972        return -1 * self
973
974    def __eq__(self, other):
975        return self.real == other.real and self.imag == other.imag
976
977    def __str__(self):
978        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
979
980    def __repr__(self):
981        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0)
877    def __init__(self, real, imag=0.0):
878        self._real = real
879        self._imag = imag
880        self.tag = None
def gamma_method(self, **kwargs):
890    def gamma_method(self, **kwargs):
891        """Executes the gamma_method for the real and the imaginary part."""
892        if isinstance(self.real, Obs):
893            self.real.gamma_method(**kwargs)
894        if isinstance(self.imag, Obs):
895            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self):
897    def is_zero(self):
898        """Checks whether both real and imaginary part are zero within machine precision."""
899        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self):
901    def conjugate(self):
902        return CObs(self.real, -self.imag)
def derived_observable(func, data, array_mode=False, **kwargs):
1103def derived_observable(func, data, array_mode=False, **kwargs):
1104    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1105
1106    Parameters
1107    ----------
1108    func : object
1109        arbitrary function of the form func(data, **kwargs). For the
1110        automatic differentiation to work, all numpy functions have to have
1111        the autograd wrapper (use 'import autograd.numpy as anp').
1112    data : list
1113        list of Obs, e.g. [obs1, obs2, obs3].
1114    num_grad : bool
1115        if True, numerical derivatives are used instead of autograd
1116        (default False). To control the numerical differentiation the
1117        kwargs of numdifftools.step_generators.MaxStepGenerator
1118        can be used.
1119    man_grad : list
1120        manually supply a list or an array which contains the jacobian
1121        of func. Use cautiously, supplying the wrong derivative will
1122        not be intercepted.
1123
1124    Notes
1125    -----
1126    For simple mathematical operations it can be practical to use anonymous
1127    functions. For the ratio of two observables one can e.g. use
1128
1129    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1130    """
1131
1132    data = np.asarray(data)
1133    raveled_data = data.ravel()
1134
1135    # Workaround for matrix operations containing non Obs data
1136    if not all(isinstance(x, Obs) for x in raveled_data):
1137        for i in range(len(raveled_data)):
1138            if isinstance(raveled_data[i], (int, float)):
1139                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1140
1141    allcov = {}
1142    for o in raveled_data:
1143        for name in o.cov_names:
1144            if name in allcov:
1145                if not np.allclose(allcov[name], o.covobs[name].cov):
1146                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1147            else:
1148                allcov[name] = o.covobs[name].cov
1149
1150    n_obs = len(raveled_data)
1151    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1152    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1153    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1154
1155    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1156
1157    if data.ndim == 1:
1158        values = np.array([o.value for o in data])
1159    else:
1160        values = np.vectorize(lambda x: x.value)(data)
1161
1162    new_values = func(values, **kwargs)
1163
1164    multi = int(isinstance(new_values, np.ndarray))
1165
1166    new_r_values = {}
1167    new_idl_d = {}
1168    for name in new_sample_names:
1169        idl = []
1170        tmp_values = np.zeros(n_obs)
1171        for i, item in enumerate(raveled_data):
1172            tmp_values[i] = item.r_values.get(name, item.value)
1173            tmp_idl = item.idl.get(name)
1174            if tmp_idl is not None:
1175                idl.append(tmp_idl)
1176        if multi > 0:
1177            tmp_values = np.array(tmp_values).reshape(data.shape)
1178        new_r_values[name] = func(tmp_values, **kwargs)
1179        new_idl_d[name] = _merge_idx(idl)
1180
1181    if 'man_grad' in kwargs:
1182        deriv = np.asarray(kwargs.get('man_grad'))
1183        if new_values.shape + data.shape != deriv.shape:
1184            raise Exception('Manual derivative does not have correct shape.')
1185    elif kwargs.get('num_grad') is True:
1186        if multi > 0:
1187            raise Exception('Multi mode currently not supported for numerical derivative')
1188        options = {
1189            'base_step': 0.1,
1190            'step_ratio': 2.5}
1191        for key in options.keys():
1192            kwarg = kwargs.get(key)
1193            if kwarg is not None:
1194                options[key] = kwarg
1195        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1196        if tmp_df.size == 1:
1197            deriv = np.array([tmp_df.real])
1198        else:
1199            deriv = tmp_df.real
1200    else:
1201        deriv = jacobian(func)(values, **kwargs)
1202
1203    final_result = np.zeros(new_values.shape, dtype=object)
1204
1205    if array_mode is True:
1206
1207        class _Zero_grad():
1208            def __init__(self, N):
1209                self.grad = np.zeros((N, 1))
1210
1211        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1212        d_extracted = {}
1213        g_extracted = {}
1214        for name in new_sample_names:
1215            d_extracted[name] = []
1216            ens_length = len(new_idl_d[name])
1217            for i_dat, dat in enumerate(data):
1218                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1219        for name in new_cov_names:
1220            g_extracted[name] = []
1221            zero_grad = _Zero_grad(new_covobs_lengths[name])
1222            for i_dat, dat in enumerate(data):
1223                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1224
1225    for i_val, new_val in np.ndenumerate(new_values):
1226        new_deltas = {}
1227        new_grad = {}
1228        if array_mode is True:
1229            for name in new_sample_names:
1230                ens_length = d_extracted[name][0].shape[-1]
1231                new_deltas[name] = np.zeros(ens_length)
1232                for i_dat, dat in enumerate(d_extracted[name]):
1233                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1234            for name in new_cov_names:
1235                new_grad[name] = 0
1236                for i_dat, dat in enumerate(g_extracted[name]):
1237                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1238        else:
1239            for j_obs, obs in np.ndenumerate(data):
1240                for name in obs.names:
1241                    if name in obs.cov_names:
1242                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1243                    else:
1244                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1245
1246        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1247
1248        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1249            raise Exception('The same name has been used for deltas and covobs!')
1250        new_samples = []
1251        new_means = []
1252        new_idl = []
1253        new_names_obs = []
1254        for name in new_names:
1255            if name not in new_covobs:
1256                new_samples.append(new_deltas[name])
1257                new_idl.append(new_idl_d[name])
1258                new_means.append(new_r_values[name][i_val])
1259                new_names_obs.append(name)
1260        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1261        for name in new_covobs:
1262            final_result[i_val].names.append(name)
1263        final_result[i_val]._covobs = new_covobs
1264        final_result[i_val]._value = new_val
1265        final_result[i_val].reweighted = reweighted
1266
1267    if multi == 0:
1268        final_result = final_result.item()
1269
1270    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs):
1302def reweight(weight, obs, **kwargs):
1303    """Reweight a list of observables.
1304
1305    Parameters
1306    ----------
1307    weight : Obs
1308        Reweighting factor. An Observable that has to be defined on a superset of the
1309        configurations in obs[i].idl for all i.
1310    obs : list
1311        list of Obs, e.g. [obs1, obs2, obs3].
1312    all_configs : bool
1313        if True, the reweighted observables are normalized by the average of
1314        the reweighting factor on all configurations in weight.idl and not
1315        on the configurations in obs[i].idl. Default False.
1316    """
1317    result = []
1318    for i in range(len(obs)):
1319        if len(obs[i].cov_names):
1320            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1321        if not set(obs[i].names).issubset(weight.names):
1322            raise Exception('Error: Ensembles do not fit')
1323        for name in obs[i].names:
1324            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1325                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1326        new_samples = []
1327        w_deltas = {}
1328        for name in sorted(obs[i].names):
1329            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1330            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1331        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1332
1333        if kwargs.get('all_configs'):
1334            new_weight = weight
1335        else:
1336            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1337
1338        result.append(tmp_obs / new_weight)
1339        result[-1].reweighted = True
1340
1341    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def correlate(obs_a, obs_b):
1344def correlate(obs_a, obs_b):
1345    """Correlate two observables.
1346
1347    Parameters
1348    ----------
1349    obs_a : Obs
1350        First observable
1351    obs_b : Obs
1352        Second observable
1353
1354    Notes
1355    -----
1356    Keep in mind to only correlate primary observables which have not been reweighted
1357    yet. The reweighting has to be applied after correlating the observables.
1358    Currently only works if ensembles are identical (this is not strictly necessary).
1359    """
1360
1361    if sorted(obs_a.names) != sorted(obs_b.names):
1362        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1363    if len(obs_a.cov_names) or len(obs_b.cov_names):
1364        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1365    for name in obs_a.names:
1366        if obs_a.shape[name] != obs_b.shape[name]:
1367            raise Exception('Shapes of ensemble', name, 'do not fit')
1368        if obs_a.idl[name] != obs_b.idl[name]:
1369            raise Exception('idl of ensemble', name, 'do not fit')
1370
1371    if obs_a.reweighted is True:
1372        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1373    if obs_b.reweighted is True:
1374        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1375
1376    new_samples = []
1377    new_idl = []
1378    for name in sorted(obs_a.names):
1379        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1380        new_idl.append(obs_a.idl[name])
1381
1382    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1383    o.reweighted = obs_a.reweighted or obs_b.reweighted
1384    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1387def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1388    r'''Calculates the error covariance matrix of a set of observables.
1389
1390    WARNING: This function should be used with care, especially for observables with support on multiple
1391             ensembles with differing autocorrelations. See the notes below for details.
1392
1393    The gamma method has to be applied first to all observables.
1394
1395    Parameters
1396    ----------
1397    obs : list or numpy.ndarray
1398        List or one dimensional array of Obs
1399    visualize : bool
1400        If True plots the corresponding normalized correlation matrix (default False).
1401    correlation : bool
1402        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1403    smooth : None or int
1404        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1405        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1406        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1407        small ones.
1408
1409    Notes
1410    -----
1411    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1412    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1413    in the absence of autocorrelation.
1414    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1415    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1416    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1417    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1418    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1419    '''
1420
1421    length = len(obs)
1422
1423    max_samples = np.max([o.N for o in obs])
1424    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1425        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1426
1427    cov = np.zeros((length, length))
1428    for i in range(length):
1429        for j in range(i, length):
1430            cov[i, j] = _covariance_element(obs[i], obs[j])
1431    cov = cov + cov.T - np.diag(np.diag(cov))
1432
1433    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1434
1435    if isinstance(smooth, int):
1436        corr = _smooth_eigenvalues(corr, smooth)
1437
1438    if visualize:
1439        plt.matshow(corr, vmin=-1, vmax=1)
1440        plt.set_cmap('RdBu')
1441        plt.colorbar()
1442        plt.draw()
1443
1444    if correlation is True:
1445        return corr
1446
1447    errors = [o.dvalue for o in obs]
1448    cov = np.diag(errors) @ corr @ np.diag(errors)
1449
1450    eigenvalues = np.linalg.eigh(cov)[0]
1451    if not np.all(eigenvalues >= 0):
1452        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1453
1454    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None):
1534def import_jackknife(jacks, name, idl=None):
1535    """Imports jackknife samples and returns an Obs
1536
1537    Parameters
1538    ----------
1539    jacks : numpy.ndarray
1540        numpy array containing the mean value as zeroth entry and
1541        the N jackknife samples as first to Nth entry.
1542    name : str
1543        name of the ensemble the samples are defined on.
1544    """
1545    length = len(jacks) - 1
1546    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1547    samples = jacks[1:] @ prj
1548    mean = np.mean(samples)
1549    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1550    new_obs._value = jacks[0]
1551    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def merge_obs(list_of_obs):
1554def merge_obs(list_of_obs):
1555    """Combine all observables in list_of_obs into one new observable
1556
1557    Parameters
1558    ----------
1559    list_of_obs : list
1560        list of the Obs object to be combined
1561
1562    Notes
1563    -----
1564    It is not possible to combine obs which are based on the same replicum
1565    """
1566    replist = [item for obs in list_of_obs for item in obs.names]
1567    if (len(replist) == len(set(replist))) is False:
1568        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1569    if any([len(o.cov_names) for o in list_of_obs]):
1570        raise Exception('Not possible to merge data that contains covobs!')
1571    new_dict = {}
1572    idl_dict = {}
1573    for o in list_of_obs:
1574        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1575                        for key in set(o.deltas) | set(o.r_values)})
1576        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1577
1578    names = sorted(new_dict.keys())
1579    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1580    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1581    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None):
1584def cov_Obs(means, cov, name, grad=None):
1585    """Create an Obs based on mean(s) and a covariance matrix
1586
1587    Parameters
1588    ----------
1589    mean : list of floats or float
1590        N mean value(s) of the new Obs
1591    cov : list or array
1592        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1593    name : str
1594        identifier for the covariance matrix
1595    grad : list or array
1596        Gradient of the Covobs wrt. the means belonging to cov.
1597    """
1598
1599    def covobs_to_obs(co):
1600        """Make an Obs out of a Covobs
1601
1602        Parameters
1603        ----------
1604        co : Covobs
1605            Covobs to be embedded into the Obs
1606        """
1607        o = Obs([], [], means=[])
1608        o._value = co.value
1609        o.names.append(co.name)
1610        o._covobs[co.name] = co
1611        o._dvalue = np.sqrt(co.errsq())
1612        return o
1613
1614    ol = []
1615    if isinstance(means, (float, int)):
1616        means = [means]
1617
1618    for i in range(len(means)):
1619        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1620    if ol[0].covobs[name].N != len(means):
1621        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1622    if len(ol) == 1:
1623        return ol[0]
1624    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.