pyerrors.obs

   1import warnings
   2import pickle
   3from math import gcd
   4from functools import reduce
   5import numpy as np
   6import autograd.numpy as anp  # Thinly-wrapped numpy
   7from autograd import jacobian
   8import matplotlib.pyplot as plt
   9from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  10import numdifftools as nd
  11from itertools import groupby
  12from .covobs import Covobs
  13
  14
  15class Obs:
  16    """Class for a general observable.
  17
  18    Instances of Obs are the basic objects of a pyerrors error analysis.
  19    They are initialized with a list which contains arrays of samples for
  20    different ensembles/replica and another list of same length which contains
  21    the names of the ensembles/replica. Mathematical operations can be
  22    performed on instances. The result is another instance of Obs. The error of
  23    an instance can be computed with the gamma_method. Also contains additional
  24    methods for output and visualization of the error calculation.
  25
  26    Attributes
  27    ----------
  28    S_global : float
  29        Standard value for S (default 2.0)
  30    S_dict : dict
  31        Dictionary for S values. If an entry for a given ensemble
  32        exists this overwrites the standard value for that ensemble.
  33    tau_exp_global : float
  34        Standard value for tau_exp (default 0.0)
  35    tau_exp_dict : dict
  36        Dictionary for tau_exp values. If an entry for a given ensemble exists
  37        this overwrites the standard value for that ensemble.
  38    N_sigma_global : float
  39        Standard value for N_sigma (default 1.0)
  40    N_sigma_dict : dict
  41        Dictionary for N_sigma values. If an entry for a given ensemble exists
  42        this overwrites the standard value for that ensemble.
  43    """
  44    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  45                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  46                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  47                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  48                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
  49
  50    S_global = 2.0
  51    S_dict = {}
  52    tau_exp_global = 0.0
  53    tau_exp_dict = {}
  54    N_sigma_global = 1.0
  55    N_sigma_dict = {}
  56    filter_eps = 1e-10
  57
  58    def __init__(self, samples, names, idl=None, **kwargs):
  59        """ Initialize Obs object.
  60
  61        Parameters
  62        ----------
  63        samples : list
  64            list of numpy arrays containing the Monte Carlo samples
  65        names : list
  66            list of strings labeling the individual samples
  67        idl : list, optional
  68            list of ranges or lists on which the samples are defined
  69        """
  70
  71        if kwargs.get("means") is None and len(samples):
  72            if len(samples) != len(names):
  73                raise Exception('Length of samples and names incompatible.')
  74            if idl is not None:
  75                if len(idl) != len(names):
  76                    raise Exception('Length of idl incompatible with samples and names.')
  77            name_length = len(names)
  78            if name_length > 1:
  79                if name_length != len(set(names)):
  80                    raise Exception('names are not unique.')
  81                if not all(isinstance(x, str) for x in names):
  82                    raise TypeError('All names have to be strings.')
  83            else:
  84                if not isinstance(names[0], str):
  85                    raise TypeError('All names have to be strings.')
  86            if min(len(x) for x in samples) <= 4:
  87                raise Exception('Samples have to have at least 5 entries.')
  88
  89        self.names = sorted(names)
  90        self.shape = {}
  91        self.r_values = {}
  92        self.deltas = {}
  93        self._covobs = {}
  94
  95        self._value = 0
  96        self.N = 0
  97        self.is_merged = {}
  98        self.idl = {}
  99        if idl is not None:
 100            for name, idx in sorted(zip(names, idl)):
 101                if isinstance(idx, range):
 102                    self.idl[name] = idx
 103                elif isinstance(idx, (list, np.ndarray)):
 104                    dc = np.unique(np.diff(idx))
 105                    if np.any(dc < 0):
 106                        raise Exception("Unsorted idx for idl[%s]" % (name))
 107                    if len(dc) == 1:
 108                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 109                    else:
 110                        self.idl[name] = list(idx)
 111                else:
 112                    raise Exception('incompatible type for idl[%s].' % (name))
 113        else:
 114            for name, sample in sorted(zip(names, samples)):
 115                self.idl[name] = range(1, len(sample) + 1)
 116
 117        if kwargs.get("means") is not None:
 118            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 119                self.shape[name] = len(self.idl[name])
 120                self.N += self.shape[name]
 121                self.r_values[name] = mean
 122                self.deltas[name] = sample
 123        else:
 124            for name, sample in sorted(zip(names, samples)):
 125                self.shape[name] = len(self.idl[name])
 126                self.N += self.shape[name]
 127                if len(sample) != self.shape[name]:
 128                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 129                self.r_values[name] = np.mean(sample)
 130                self.deltas[name] = sample - self.r_values[name]
 131                self._value += self.shape[name] * self.r_values[name]
 132            self._value /= self.N
 133
 134        self._dvalue = 0.0
 135        self.ddvalue = 0.0
 136        self.reweighted = False
 137
 138        self.tag = None
 139
 140    @property
 141    def value(self):
 142        return self._value
 143
 144    @property
 145    def dvalue(self):
 146        return self._dvalue
 147
 148    @property
 149    def e_names(self):
 150        return sorted(set([o.split('|')[0] for o in self.names]))
 151
 152    @property
 153    def cov_names(self):
 154        return sorted(set([o for o in self.covobs.keys()]))
 155
 156    @property
 157    def mc_names(self):
 158        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 159
 160    @property
 161    def e_content(self):
 162        res = {}
 163        for e, e_name in enumerate(self.e_names):
 164            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 165            if e_name in self.names:
 166                res[e_name].append(e_name)
 167        return res
 168
 169    @property
 170    def covobs(self):
 171        return self._covobs
 172
 173    def gamma_method(self, **kwargs):
 174        """Estimate the error and related properties of the Obs.
 175
 176        Parameters
 177        ----------
 178        S : float
 179            specifies a custom value for the parameter S (default 2.0).
 180            If set to 0 it is assumed that the data exhibits no
 181            autocorrelation. In this case the error estimates coincides
 182            with the sample standard error.
 183        tau_exp : float
 184            positive value triggers the critical slowing down analysis
 185            (default 0.0).
 186        N_sigma : float
 187            number of standard deviations from zero until the tail is
 188            attached to the autocorrelation function (default 1).
 189        fft : bool
 190            determines whether the fft algorithm is used for the computation
 191            of the autocorrelation function (default True)
 192        """
 193
 194        e_content = self.e_content
 195        self.e_dvalue = {}
 196        self.e_ddvalue = {}
 197        self.e_tauint = {}
 198        self.e_dtauint = {}
 199        self.e_windowsize = {}
 200        self.e_n_tauint = {}
 201        self.e_n_dtauint = {}
 202        e_gamma = {}
 203        self.e_rho = {}
 204        self.e_drho = {}
 205        self._dvalue = 0
 206        self.ddvalue = 0
 207
 208        self.S = {}
 209        self.tau_exp = {}
 210        self.N_sigma = {}
 211
 212        if kwargs.get('fft') is False:
 213            fft = False
 214        else:
 215            fft = True
 216
 217        def _parse_kwarg(kwarg_name):
 218            if kwarg_name in kwargs:
 219                tmp = kwargs.get(kwarg_name)
 220                if isinstance(tmp, (int, float)):
 221                    if tmp < 0:
 222                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 223                    for e, e_name in enumerate(self.e_names):
 224                        getattr(self, kwarg_name)[e_name] = tmp
 225                else:
 226                    raise TypeError(kwarg_name + ' is not in proper format.')
 227            else:
 228                for e, e_name in enumerate(self.e_names):
 229                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 230                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 231                    else:
 232                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 233
 234        _parse_kwarg('S')
 235        _parse_kwarg('tau_exp')
 236        _parse_kwarg('N_sigma')
 237
 238        for e, e_name in enumerate(self.mc_names):
 239            r_length = []
 240            for r_name in e_content[e_name]:
 241                if isinstance(self.idl[r_name], range):
 242                    r_length.append(len(self.idl[r_name]))
 243                else:
 244                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
 245
 246            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 247            w_max = max(r_length) // 2
 248            e_gamma[e_name] = np.zeros(w_max)
 249            self.e_rho[e_name] = np.zeros(w_max)
 250            self.e_drho[e_name] = np.zeros(w_max)
 251
 252            for r_name in e_content[e_name]:
 253                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
 254
 255            gamma_div = np.zeros(w_max)
 256            for r_name in e_content[e_name]:
 257                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
 258            gamma_div[gamma_div < 1] = 1.0
 259            e_gamma[e_name] /= gamma_div[:w_max]
 260
 261            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 262                self.e_tauint[e_name] = 0.5
 263                self.e_dtauint[e_name] = 0.0
 264                self.e_dvalue[e_name] = 0.0
 265                self.e_ddvalue[e_name] = 0.0
 266                self.e_windowsize[e_name] = 0
 267                continue
 268
 269            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 270            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 271            # Make sure no entry of tauint is smaller than 0.5
 272            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 273            # hep-lat/0306017 eq. (42)
 274            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 275            self.e_n_dtauint[e_name][0] = 0.0
 276
 277            def _compute_drho(i):
 278                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
 279                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 280
 281            _compute_drho(1)
 282            if self.tau_exp[e_name] > 0:
 283                texp = self.tau_exp[e_name]
 284                # Critical slowing down analysis
 285                if w_max // 2 <= 1:
 286                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 287                for n in range(1, w_max // 2):
 288                    _compute_drho(n + 1)
 289                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 290                        # Bias correction hep-lat/0306017 eq. (49) included
 291                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 292                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 293                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 294                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 295                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 296                        self.e_windowsize[e_name] = n
 297                        break
 298            else:
 299                if self.S[e_name] == 0.0:
 300                    self.e_tauint[e_name] = 0.5
 301                    self.e_dtauint[e_name] = 0.0
 302                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 303                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 304                    self.e_windowsize[e_name] = 0
 305                else:
 306                    # Standard automatic windowing procedure
 307                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 308                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
 309                    for n in range(1, w_max):
 310                        if n < w_max // 2 - 2:
 311                            _compute_drho(n + 1)
 312                        if g_w[n - 1] < 0 or n >= w_max - 1:
 313                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 314                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 315                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 316                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 317                            self.e_windowsize[e_name] = n
 318                            break
 319
 320            self._dvalue += self.e_dvalue[e_name] ** 2
 321            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 322
 323        for e_name in self.cov_names:
 324            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 325            self.e_ddvalue[e_name] = 0
 326            self._dvalue += self.e_dvalue[e_name]**2
 327
 328        self._dvalue = np.sqrt(self._dvalue)
 329        if self._dvalue == 0.0:
 330            self.ddvalue = 0.0
 331        else:
 332            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 333        return
 334
 335    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
 336        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 337           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 338
 339        Parameters
 340        ----------
 341        deltas : list
 342            List of fluctuations
 343        idx : list
 344            List or range of configurations on which the deltas are defined.
 345        shape : int
 346            Number of configurations in idx.
 347        w_max : int
 348            Upper bound for the summation window.
 349        fft : bool
 350            determines whether the fft algorithm is used for the computation
 351            of the autocorrelation function.
 352        """
 353        gamma = np.zeros(w_max)
 354        deltas = _expand_deltas(deltas, idx, shape)
 355        new_shape = len(deltas)
 356        if fft:
 357            max_gamma = min(new_shape, w_max)
 358            # The padding for the fft has to be even
 359            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 360            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 361        else:
 362            for n in range(w_max):
 363                if new_shape - n >= 0:
 364                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 365
 366        return gamma
 367
 368    def details(self, ens_content=True):
 369        """Output detailed properties of the Obs.
 370
 371        Parameters
 372        ----------
 373        ens_content : bool
 374            print details about the ensembles and replica if true.
 375        """
 376        if self.tag is not None:
 377            print("Description:", self.tag)
 378        if not hasattr(self, 'e_dvalue'):
 379            print('Result\t %3.8e' % (self.value))
 380        else:
 381            if self.value == 0.0:
 382                percentage = np.nan
 383            else:
 384                percentage = np.abs(self._dvalue / self.value) * 100
 385            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 386            if len(self.e_names) > 1:
 387                print(' Ensemble errors:')
 388            for e_name in self.mc_names:
 389                if len(self.e_names) > 1:
 390                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 391                if self.tau_exp[e_name] > 0:
 392                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
 393                else:
 394                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
 395            for e_name in self.cov_names:
 396                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 397        if ens_content is True:
 398            if len(self.e_names) == 1:
 399                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 400            else:
 401                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 402            my_string_list = []
 403            for key, value in sorted(self.e_content.items()):
 404                if key not in self.covobs:
 405                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 406                    if len(value) == 1:
 407                        my_string += f': {self.shape[value[0]]} configurations'
 408                        if isinstance(self.idl[value[0]], range):
 409                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 410                        else:
 411                            my_string += ' (irregular range)'
 412                    else:
 413                        sublist = []
 414                        for v in value:
 415                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 416                            my_substring += f': {self.shape[v]} configurations'
 417                            if isinstance(self.idl[v], range):
 418                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 419                            else:
 420                                my_substring += ' (irregular range)'
 421                            sublist.append(my_substring)
 422
 423                        my_string += '\n' + '\n'.join(sublist)
 424                else:
 425                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 426                my_string_list.append(my_string)
 427            print('\n'.join(my_string_list))
 428
 429    def reweight(self, weight):
 430        """Reweight the obs with given rewighting factors.
 431
 432        Parameters
 433        ----------
 434        weight : Obs
 435            Reweighting factor. An Observable that has to be defined on a superset of the
 436            configurations in obs[i].idl for all i.
 437        all_configs : bool
 438            if True, the reweighted observables are normalized by the average of
 439            the reweighting factor on all configurations in weight.idl and not
 440            on the configurations in obs[i].idl. Default False.
 441        """
 442        return reweight(weight, [self])[0]
 443
 444    def is_zero_within_error(self, sigma=1):
 445        """Checks whether the observable is zero within 'sigma' standard errors.
 446
 447        Parameters
 448        ----------
 449        sigma : int
 450            Number of standard errors used for the check.
 451
 452        Works only properly when the gamma method was run.
 453        """
 454        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 455
 456    def is_zero(self, atol=1e-10):
 457        """Checks whether the observable is zero within a given tolerance.
 458
 459        Parameters
 460        ----------
 461        atol : float
 462            Absolute tolerance (for details see numpy documentation).
 463        """
 464        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 465
 466    def plot_tauint(self, save=None):
 467        """Plot integrated autocorrelation time for each ensemble.
 468
 469        Parameters
 470        ----------
 471        save : str
 472            saves the figure to a file named 'save' if.
 473        """
 474        if not hasattr(self, 'e_dvalue'):
 475            raise Exception('Run the gamma method first.')
 476
 477        for e, e_name in enumerate(self.mc_names):
 478            fig = plt.figure()
 479            plt.xlabel(r'$W$')
 480            plt.ylabel(r'$\tau_\mathrm{int}$')
 481            length = int(len(self.e_n_tauint[e_name]))
 482            if self.tau_exp[e_name] > 0:
 483                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 484                x_help = np.arange(2 * self.tau_exp[e_name])
 485                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 486                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 487                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 488                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 489                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 490                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 491                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 492            else:
 493                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 494                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 495
 496            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 497            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 498            plt.legend()
 499            plt.xlim(-0.5, xmax)
 500            ylim = plt.ylim()
 501            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 502            plt.draw()
 503            if save:
 504                fig.savefig(save + "_" + str(e))
 505
 506    def plot_rho(self, save=None):
 507        """Plot normalized autocorrelation function time for each ensemble.
 508
 509        Parameters
 510        ----------
 511        save : str
 512            saves the figure to a file named 'save' if.
 513        """
 514        if not hasattr(self, 'e_dvalue'):
 515            raise Exception('Run the gamma method first.')
 516        for e, e_name in enumerate(self.mc_names):
 517            fig = plt.figure()
 518            plt.xlabel('W')
 519            plt.ylabel('rho')
 520            length = int(len(self.e_drho[e_name]))
 521            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 522            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 523            if self.tau_exp[e_name] > 0:
 524                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 525                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 526                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 527                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 528            else:
 529                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 530                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 531            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 532            plt.xlim(-0.5, xmax)
 533            plt.draw()
 534            if save:
 535                fig.savefig(save + "_" + str(e))
 536
 537    def plot_rep_dist(self):
 538        """Plot replica distribution for each ensemble with more than one replicum."""
 539        if not hasattr(self, 'e_dvalue'):
 540            raise Exception('Run the gamma method first.')
 541        for e, e_name in enumerate(self.mc_names):
 542            if len(self.e_content[e_name]) == 1:
 543                print('No replica distribution for a single replicum (', e_name, ')')
 544                continue
 545            r_length = []
 546            sub_r_mean = 0
 547            for r, r_name in enumerate(self.e_content[e_name]):
 548                r_length.append(len(self.deltas[r_name]))
 549                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 550            e_N = np.sum(r_length)
 551            sub_r_mean /= e_N
 552            arr = np.zeros(len(self.e_content[e_name]))
 553            for r, r_name in enumerate(self.e_content[e_name]):
 554                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 555            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 556            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 557            plt.draw()
 558
 559    def plot_history(self, expand=True):
 560        """Plot derived Monte Carlo history for each ensemble
 561
 562        Parameters
 563        ----------
 564        expand : bool
 565            show expanded history for irregular Monte Carlo chains (default: True).
 566        """
 567        for e, e_name in enumerate(self.mc_names):
 568            plt.figure()
 569            r_length = []
 570            tmp = []
 571            tmp_expanded = []
 572            for r, r_name in enumerate(self.e_content[e_name]):
 573                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 574                if expand:
 575                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
 576                    r_length.append(len(tmp_expanded[-1]))
 577                else:
 578                    r_length.append(len(tmp[-1]))
 579            e_N = np.sum(r_length)
 580            x = np.arange(e_N)
 581            y_test = np.concatenate(tmp, axis=0)
 582            if expand:
 583                y = np.concatenate(tmp_expanded, axis=0)
 584            else:
 585                y = y_test
 586            plt.errorbar(x, y, fmt='.', markersize=3)
 587            plt.xlim(-0.5, e_N - 0.5)
 588            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 589            plt.draw()
 590
 591    def plot_piechart(self, save=None):
 592        """Plot piechart which shows the fractional contribution of each
 593        ensemble to the error and returns a dictionary containing the fractions.
 594
 595        Parameters
 596        ----------
 597        save : str
 598            saves the figure to a file named 'save' if.
 599        """
 600        if not hasattr(self, 'e_dvalue'):
 601            raise Exception('Run the gamma method first.')
 602        if np.isclose(0.0, self._dvalue, atol=1e-15):
 603            raise Exception('Error is 0.0')
 604        labels = self.e_names
 605        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 606        fig1, ax1 = plt.subplots()
 607        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 608        ax1.axis('equal')
 609        plt.draw()
 610        if save:
 611            fig1.savefig(save)
 612
 613        return dict(zip(self.e_names, sizes))
 614
 615    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 616        """Dump the Obs to a file 'name' of chosen format.
 617
 618        Parameters
 619        ----------
 620        filename : str
 621            name of the file to be saved.
 622        datatype : str
 623            Format of the exported file. Supported formats include
 624            "json.gz" and "pickle"
 625        description : str
 626            Description for output file, only relevant for json.gz format.
 627        path : str
 628            specifies a custom path for the file (default '.')
 629        """
 630        if 'path' in kwargs:
 631            file_name = kwargs.get('path') + '/' + filename
 632        else:
 633            file_name = filename
 634
 635        if datatype == "json.gz":
 636            from .input.json import dump_to_json
 637            dump_to_json([self], file_name, description=description)
 638        elif datatype == "pickle":
 639            with open(file_name + '.p', 'wb') as fb:
 640                pickle.dump(self, fb)
 641        else:
 642            raise Exception("Unknown datatype " + str(datatype))
 643
 644    def export_jackknife(self):
 645        """Export jackknife samples from the Obs
 646
 647        Returns
 648        -------
 649        numpy.ndarray
 650            Returns a numpy array of length N + 1 where N is the number of samples
 651            for the given ensemble and replicum. The zeroth entry of the array contains
 652            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 653            derived from the Obs. The current implementation only works for observables
 654            defined on exactly one ensemble and replicum. The derived jackknife samples
 655            should agree with samples from a full jackknife analysis up to O(1/N).
 656        """
 657
 658        if len(self.names) != 1:
 659            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 660
 661        name = self.names[0]
 662        full_data = self.deltas[name] + self.r_values[name]
 663        n = full_data.size
 664        mean = self.value
 665        tmp_jacks = np.zeros(n + 1)
 666        tmp_jacks[0] = mean
 667        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 668        return tmp_jacks
 669
 670    def __float__(self):
 671        return float(self.value)
 672
 673    def __repr__(self):
 674        return 'Obs[' + str(self) + ']'
 675
 676    def __str__(self):
 677        if self._dvalue == 0.0:
 678            return str(self.value)
 679        fexp = np.floor(np.log10(self._dvalue))
 680        if fexp < 0.0:
 681            return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
 682        elif fexp == 0.0:
 683            return '{:.1f}({:1.1f})'.format(self.value, self._dvalue)
 684        else:
 685            return '{:.0f}({:2.0f})'.format(self.value, self._dvalue)
 686
 687    # Overload comparisons
 688    def __lt__(self, other):
 689        return self.value < other
 690
 691    def __le__(self, other):
 692        return self.value <= other
 693
 694    def __gt__(self, other):
 695        return self.value > other
 696
 697    def __ge__(self, other):
 698        return self.value >= other
 699
 700    def __eq__(self, other):
 701        return (self - other).is_zero()
 702
 703    def __ne__(self, other):
 704        return not (self - other).is_zero()
 705
 706    # Overload math operations
 707    def __add__(self, y):
 708        if isinstance(y, Obs):
 709            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 710        else:
 711            if isinstance(y, np.ndarray):
 712                return np.array([self + o for o in y])
 713            elif y.__class__.__name__ in ['Corr', 'CObs']:
 714                return NotImplemented
 715            else:
 716                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 717
 718    def __radd__(self, y):
 719        return self + y
 720
 721    def __mul__(self, y):
 722        if isinstance(y, Obs):
 723            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 724        else:
 725            if isinstance(y, np.ndarray):
 726                return np.array([self * o for o in y])
 727            elif isinstance(y, complex):
 728                return CObs(self * y.real, self * y.imag)
 729            elif y.__class__.__name__ in ['Corr', 'CObs']:
 730                return NotImplemented
 731            else:
 732                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 733
 734    def __rmul__(self, y):
 735        return self * y
 736
 737    def __sub__(self, y):
 738        if isinstance(y, Obs):
 739            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 740        else:
 741            if isinstance(y, np.ndarray):
 742                return np.array([self - o for o in y])
 743            elif y.__class__.__name__ in ['Corr', 'CObs']:
 744                return NotImplemented
 745            else:
 746                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 747
 748    def __rsub__(self, y):
 749        return -1 * (self - y)
 750
 751    def __pos__(self):
 752        return self
 753
 754    def __neg__(self):
 755        return -1 * self
 756
 757    def __truediv__(self, y):
 758        if isinstance(y, Obs):
 759            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 760        else:
 761            if isinstance(y, np.ndarray):
 762                return np.array([self / o for o in y])
 763            elif y.__class__.__name__ in ['Corr', 'CObs']:
 764                return NotImplemented
 765            else:
 766                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 767
 768    def __rtruediv__(self, y):
 769        if isinstance(y, Obs):
 770            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 771        else:
 772            if isinstance(y, np.ndarray):
 773                return np.array([o / self for o in y])
 774            elif y.__class__.__name__ in ['Corr', 'CObs']:
 775                return NotImplemented
 776            else:
 777                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 778
 779    def __pow__(self, y):
 780        if isinstance(y, Obs):
 781            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 782        else:
 783            return derived_observable(lambda x: x[0] ** y, [self])
 784
 785    def __rpow__(self, y):
 786        if isinstance(y, Obs):
 787            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 788        else:
 789            return derived_observable(lambda x: y ** x[0], [self])
 790
 791    def __abs__(self):
 792        return derived_observable(lambda x: anp.abs(x[0]), [self])
 793
 794    # Overload numpy functions
 795    def sqrt(self):
 796        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 797
 798    def log(self):
 799        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 800
 801    def exp(self):
 802        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 803
 804    def sin(self):
 805        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 806
 807    def cos(self):
 808        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 809
 810    def tan(self):
 811        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 812
 813    def arcsin(self):
 814        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 815
 816    def arccos(self):
 817        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 818
 819    def arctan(self):
 820        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 821
 822    def sinh(self):
 823        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 824
 825    def cosh(self):
 826        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 827
 828    def tanh(self):
 829        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 830
 831    def arcsinh(self):
 832        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 833
 834    def arccosh(self):
 835        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 836
 837    def arctanh(self):
 838        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 839
 840
 841class CObs:
 842    """Class for a complex valued observable."""
 843    __slots__ = ['_real', '_imag', 'tag']
 844
 845    def __init__(self, real, imag=0.0):
 846        self._real = real
 847        self._imag = imag
 848        self.tag = None
 849
 850    @property
 851    def real(self):
 852        return self._real
 853
 854    @property
 855    def imag(self):
 856        return self._imag
 857
 858    def gamma_method(self, **kwargs):
 859        """Executes the gamma_method for the real and the imaginary part."""
 860        if isinstance(self.real, Obs):
 861            self.real.gamma_method(**kwargs)
 862        if isinstance(self.imag, Obs):
 863            self.imag.gamma_method(**kwargs)
 864
 865    def is_zero(self):
 866        """Checks whether both real and imaginary part are zero within machine precision."""
 867        return self.real == 0.0 and self.imag == 0.0
 868
 869    def conjugate(self):
 870        return CObs(self.real, -self.imag)
 871
 872    def __add__(self, other):
 873        if isinstance(other, np.ndarray):
 874            return other + self
 875        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 876            return CObs(self.real + other.real,
 877                        self.imag + other.imag)
 878        else:
 879            return CObs(self.real + other, self.imag)
 880
 881    def __radd__(self, y):
 882        return self + y
 883
 884    def __sub__(self, other):
 885        if isinstance(other, np.ndarray):
 886            return -1 * (other - self)
 887        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 888            return CObs(self.real - other.real, self.imag - other.imag)
 889        else:
 890            return CObs(self.real - other, self.imag)
 891
 892    def __rsub__(self, other):
 893        return -1 * (self - other)
 894
 895    def __mul__(self, other):
 896        if isinstance(other, np.ndarray):
 897            return other * self
 898        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 899            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 900                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 901                                               [self.real, other.real, self.imag, other.imag],
 902                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 903                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 904                                               [self.real, other.real, self.imag, other.imag],
 905                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 906            elif getattr(other, 'imag', 0) != 0:
 907                return CObs(self.real * other.real - self.imag * other.imag,
 908                            self.imag * other.real + self.real * other.imag)
 909            else:
 910                return CObs(self.real * other.real, self.imag * other.real)
 911        else:
 912            return CObs(self.real * other, self.imag * other)
 913
 914    def __rmul__(self, other):
 915        return self * other
 916
 917    def __truediv__(self, other):
 918        if isinstance(other, np.ndarray):
 919            return 1 / (other / self)
 920        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 921            r = other.real ** 2 + other.imag ** 2
 922            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 923        else:
 924            return CObs(self.real / other, self.imag / other)
 925
 926    def __rtruediv__(self, other):
 927        r = self.real ** 2 + self.imag ** 2
 928        if hasattr(other, 'real') and hasattr(other, 'imag'):
 929            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 930        else:
 931            return CObs(self.real * other / r, -self.imag * other / r)
 932
 933    def __abs__(self):
 934        return np.sqrt(self.real**2 + self.imag**2)
 935
 936    def __pos__(self):
 937        return self
 938
 939    def __neg__(self):
 940        return -1 * self
 941
 942    def __eq__(self, other):
 943        return self.real == other.real and self.imag == other.imag
 944
 945    def __str__(self):
 946        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 947
 948    def __repr__(self):
 949        return 'CObs[' + str(self) + ']'
 950
 951
 952def _expand_deltas(deltas, idx, shape):
 953    """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0.
 954       If idx is of type range, the deltas are not changed
 955
 956    Parameters
 957    ----------
 958    deltas : list
 959        List of fluctuations
 960    idx : list
 961        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
 962    shape : int
 963        Number of configs in idx.
 964    """
 965    if isinstance(idx, range):
 966        return deltas
 967    else:
 968        ret = np.zeros(idx[-1] - idx[0] + 1)
 969        for i in range(shape):
 970            ret[idx[i] - idx[0]] = deltas[i]
 971        return ret
 972
 973
 974def _merge_idx(idl):
 975    """Returns the union of all lists in idl as sorted list
 976
 977    Parameters
 978    ----------
 979    idl : list
 980        List of lists or ranges.
 981    """
 982
 983    # Use groupby to efficiently check whether all elements of idl are identical
 984    try:
 985        g = groupby(idl)
 986        if next(g, True) and not next(g, False):
 987            return idl[0]
 988    except Exception:
 989        pass
 990
 991    if np.all([type(idx) is range for idx in idl]):
 992        if len(set([idx[0] for idx in idl])) == 1:
 993            idstart = min([idx.start for idx in idl])
 994            idstop = max([idx.stop for idx in idl])
 995            idstep = min([idx.step for idx in idl])
 996            return range(idstart, idstop, idstep)
 997
 998    return sorted(set().union(*idl))
 999
1000
1001def _intersection_idx(idl):
1002    """Returns the intersection of all lists in idl as sorted list
1003
1004    Parameters
1005    ----------
1006    idl : list
1007        List of lists or ranges.
1008    """
1009
1010    def _lcm(*args):
1011        """Returns the lowest common multiple of args.
1012
1013        From python 3.9 onwards the math library contains an lcm function."""
1014        return reduce(lambda a, b: a * b // gcd(a, b), args)
1015
1016    # Use groupby to efficiently check whether all elements of idl are identical
1017    try:
1018        g = groupby(idl)
1019        if next(g, True) and not next(g, False):
1020            return idl[0]
1021    except Exception:
1022        pass
1023
1024    if np.all([type(idx) is range for idx in idl]):
1025        if len(set([idx[0] for idx in idl])) == 1:
1026            idstart = max([idx.start for idx in idl])
1027            idstop = min([idx.stop for idx in idl])
1028            idstep = _lcm(*[idx.step for idx in idl])
1029            return range(idstart, idstop, idstep)
1030
1031    return sorted(set.intersection(*[set(o) for o in idl]))
1032
1033
1034def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1035    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1036       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1037       common divisor of the step sizes is used as new step size.
1038
1039    Parameters
1040    ----------
1041    deltas : list
1042        List of fluctuations
1043    idx : list
1044        List or range of configs on which the deltas are defined.
1045        Has to be a subset of new_idx and has to be sorted in ascending order.
1046    shape : list
1047        Number of configs in idx.
1048    new_idx : list
1049        List of configs that defines the new range, has to be sorted in ascending order.
1050    """
1051
1052    if type(idx) is range and type(new_idx) is range:
1053        if idx == new_idx:
1054            return deltas
1055    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1056    for i in range(shape):
1057        ret[idx[i] - new_idx[0]] = deltas[i]
1058    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
1059
1060
1061def _collapse_deltas_for_merge(deltas, idx, shape, new_idx):
1062    """Collapse deltas defined on idx to the list of configs that is defined by new_idx.
1063       If idx and new_idx are of type range, the smallest
1064       common divisor of the step sizes is used as new step size.
1065
1066    Parameters
1067    ----------
1068    deltas : list
1069        List of fluctuations
1070    idx : list
1071        List or range of configs on which the deltas are defined.
1072        Has to be a subset of new_idx and has to be sorted in ascending order.
1073    shape : list
1074        Number of configs in idx.
1075    new_idx : list
1076        List of configs that defines the new range, has to be sorted in ascending order.
1077    """
1078
1079    if type(idx) is range and type(new_idx) is range:
1080        if idx == new_idx:
1081            return deltas
1082    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1083    for i in range(shape):
1084        if idx[i] in new_idx:
1085            ret[idx[i] - new_idx[0]] = deltas[i]
1086    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
1087
1088
1089def _filter_zeroes(deltas, idx, eps=Obs.filter_eps):
1090    """Filter out all configurations with vanishing fluctuation such that they do not
1091       contribute to the error estimate anymore. Returns the new deltas and
1092       idx according to the filtering.
1093       A fluctuation is considered to be vanishing, if it is smaller than eps times
1094       the mean of the absolute values of all deltas in one list.
1095
1096    Parameters
1097    ----------
1098    deltas : list
1099        List of fluctuations
1100    idx : list
1101        List or ranges of configs on which the deltas are defined.
1102    eps : float
1103        Prefactor that enters the filter criterion.
1104    """
1105    new_deltas = []
1106    new_idx = []
1107    maxd = np.mean(np.fabs(deltas))
1108    for i in range(len(deltas)):
1109        if abs(deltas[i]) > eps * maxd:
1110            new_deltas.append(deltas[i])
1111            new_idx.append(idx[i])
1112    if new_idx:
1113        return np.array(new_deltas), new_idx
1114    else:
1115        return deltas, idx
1116
1117
1118def derived_observable(func, data, array_mode=False, **kwargs):
1119    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1120
1121    Parameters
1122    ----------
1123    func : object
1124        arbitrary function of the form func(data, **kwargs). For the
1125        automatic differentiation to work, all numpy functions have to have
1126        the autograd wrapper (use 'import autograd.numpy as anp').
1127    data : list
1128        list of Obs, e.g. [obs1, obs2, obs3].
1129    num_grad : bool
1130        if True, numerical derivatives are used instead of autograd
1131        (default False). To control the numerical differentiation the
1132        kwargs of numdifftools.step_generators.MaxStepGenerator
1133        can be used.
1134    man_grad : list
1135        manually supply a list or an array which contains the jacobian
1136        of func. Use cautiously, supplying the wrong derivative will
1137        not be intercepted.
1138
1139    Notes
1140    -----
1141    For simple mathematical operations it can be practical to use anonymous
1142    functions. For the ratio of two observables one can e.g. use
1143
1144    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1145    """
1146
1147    data = np.asarray(data)
1148    raveled_data = data.ravel()
1149
1150    # Workaround for matrix operations containing non Obs data
1151    if not all(isinstance(x, Obs) for x in raveled_data):
1152        for i in range(len(raveled_data)):
1153            if isinstance(raveled_data[i], (int, float)):
1154                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1155
1156    allcov = {}
1157    for o in raveled_data:
1158        for name in o.cov_names:
1159            if name in allcov:
1160                if not np.allclose(allcov[name], o.covobs[name].cov):
1161                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1162            else:
1163                allcov[name] = o.covobs[name].cov
1164
1165    n_obs = len(raveled_data)
1166    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1167    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1168    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1169
1170    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1171    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1172
1173    if data.ndim == 1:
1174        values = np.array([o.value for o in data])
1175    else:
1176        values = np.vectorize(lambda x: x.value)(data)
1177
1178    new_values = func(values, **kwargs)
1179
1180    multi = int(isinstance(new_values, np.ndarray))
1181
1182    new_r_values = {}
1183    new_idl_d = {}
1184    for name in new_sample_names:
1185        idl = []
1186        tmp_values = np.zeros(n_obs)
1187        for i, item in enumerate(raveled_data):
1188            tmp_values[i] = item.r_values.get(name, item.value)
1189            tmp_idl = item.idl.get(name)
1190            if tmp_idl is not None:
1191                idl.append(tmp_idl)
1192        if multi > 0:
1193            tmp_values = np.array(tmp_values).reshape(data.shape)
1194        new_r_values[name] = func(tmp_values, **kwargs)
1195        new_idl_d[name] = _merge_idx(idl)
1196        if not is_merged[name]:
1197            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1198
1199    if 'man_grad' in kwargs:
1200        deriv = np.asarray(kwargs.get('man_grad'))
1201        if new_values.shape + data.shape != deriv.shape:
1202            raise Exception('Manual derivative does not have correct shape.')
1203    elif kwargs.get('num_grad') is True:
1204        if multi > 0:
1205            raise Exception('Multi mode currently not supported for numerical derivative')
1206        options = {
1207            'base_step': 0.1,
1208            'step_ratio': 2.5}
1209        for key in options.keys():
1210            kwarg = kwargs.get(key)
1211            if kwarg is not None:
1212                options[key] = kwarg
1213        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1214        if tmp_df.size == 1:
1215            deriv = np.array([tmp_df.real])
1216        else:
1217            deriv = tmp_df.real
1218    else:
1219        deriv = jacobian(func)(values, **kwargs)
1220
1221    final_result = np.zeros(new_values.shape, dtype=object)
1222
1223    if array_mode is True:
1224
1225        class _Zero_grad():
1226            def __init__(self, N):
1227                self.grad = np.zeros((N, 1))
1228
1229        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1230        d_extracted = {}
1231        g_extracted = {}
1232        for name in new_sample_names:
1233            d_extracted[name] = []
1234            ens_length = len(new_idl_d[name])
1235            for i_dat, dat in enumerate(data):
1236                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1237        for name in new_cov_names:
1238            g_extracted[name] = []
1239            zero_grad = _Zero_grad(new_covobs_lengths[name])
1240            for i_dat, dat in enumerate(data):
1241                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1242
1243    for i_val, new_val in np.ndenumerate(new_values):
1244        new_deltas = {}
1245        new_grad = {}
1246        if array_mode is True:
1247            for name in new_sample_names:
1248                ens_length = d_extracted[name][0].shape[-1]
1249                new_deltas[name] = np.zeros(ens_length)
1250                for i_dat, dat in enumerate(d_extracted[name]):
1251                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1252            for name in new_cov_names:
1253                new_grad[name] = 0
1254                for i_dat, dat in enumerate(g_extracted[name]):
1255                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1256        else:
1257            for j_obs, obs in np.ndenumerate(data):
1258                for name in obs.names:
1259                    if name in obs.cov_names:
1260                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1261                    else:
1262                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1263
1264        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1265
1266        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1267            raise Exception('The same name has been used for deltas and covobs!')
1268        new_samples = []
1269        new_means = []
1270        new_idl = []
1271        new_names_obs = []
1272        for name in new_names:
1273            if name not in new_covobs:
1274                if is_merged[name]:
1275                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1276                else:
1277                    filtered_deltas = new_deltas[name]
1278                    filtered_idl_d = new_idl_d[name]
1279
1280                new_samples.append(filtered_deltas)
1281                new_idl.append(filtered_idl_d)
1282                new_means.append(new_r_values[name][i_val])
1283                new_names_obs.append(name)
1284        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1285        for name in new_covobs:
1286            final_result[i_val].names.append(name)
1287        final_result[i_val]._covobs = new_covobs
1288        final_result[i_val]._value = new_val
1289        final_result[i_val].is_merged = is_merged
1290        final_result[i_val].reweighted = reweighted
1291
1292    if multi == 0:
1293        final_result = final_result.item()
1294
1295    return final_result
1296
1297
1298def _reduce_deltas(deltas, idx_old, idx_new):
1299    """Extract deltas defined on idx_old on all configs of idx_new.
1300
1301    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1302    are ordered in an ascending order.
1303
1304    Parameters
1305    ----------
1306    deltas : list
1307        List of fluctuations
1308    idx_old : list
1309        List or range of configs on which the deltas are defined
1310    idx_new : list
1311        List of configs for which we want to extract the deltas.
1312        Has to be a subset of idx_old.
1313    """
1314    if not len(deltas) == len(idx_old):
1315        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1316    if type(idx_old) is range and type(idx_new) is range:
1317        if idx_old == idx_new:
1318            return deltas
1319    shape = len(idx_new)
1320    ret = np.zeros(shape)
1321    oldpos = 0
1322    for i in range(shape):
1323        pos = -1
1324        for j in range(oldpos, len(idx_old)):
1325            if idx_old[j] == idx_new[i]:
1326                pos = j
1327                break
1328        if pos < 0:
1329            raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i]))
1330        ret[i] = deltas[pos]
1331        oldpos = pos
1332    return np.array(ret)
1333
1334
1335def reweight(weight, obs, **kwargs):
1336    """Reweight a list of observables.
1337
1338    Parameters
1339    ----------
1340    weight : Obs
1341        Reweighting factor. An Observable that has to be defined on a superset of the
1342        configurations in obs[i].idl for all i.
1343    obs : list
1344        list of Obs, e.g. [obs1, obs2, obs3].
1345    all_configs : bool
1346        if True, the reweighted observables are normalized by the average of
1347        the reweighting factor on all configurations in weight.idl and not
1348        on the configurations in obs[i].idl. Default False.
1349    """
1350    result = []
1351    for i in range(len(obs)):
1352        if len(obs[i].cov_names):
1353            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1354        if not set(obs[i].names).issubset(weight.names):
1355            raise Exception('Error: Ensembles do not fit')
1356        for name in obs[i].names:
1357            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1358                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1359        new_samples = []
1360        w_deltas = {}
1361        for name in sorted(obs[i].names):
1362            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1363            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1364        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1365
1366        if kwargs.get('all_configs'):
1367            new_weight = weight
1368        else:
1369            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1370
1371        result.append(tmp_obs / new_weight)
1372        result[-1].reweighted = True
1373        result[-1].is_merged = obs[i].is_merged
1374
1375    return result
1376
1377
1378def correlate(obs_a, obs_b):
1379    """Correlate two observables.
1380
1381    Parameters
1382    ----------
1383    obs_a : Obs
1384        First observable
1385    obs_b : Obs
1386        Second observable
1387
1388    Notes
1389    -----
1390    Keep in mind to only correlate primary observables which have not been reweighted
1391    yet. The reweighting has to be applied after correlating the observables.
1392    Currently only works if ensembles are identical (this is not strictly necessary).
1393    """
1394
1395    if sorted(obs_a.names) != sorted(obs_b.names):
1396        raise Exception('Ensembles do not fit')
1397    if len(obs_a.cov_names) or len(obs_b.cov_names):
1398        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1399    for name in obs_a.names:
1400        if obs_a.shape[name] != obs_b.shape[name]:
1401            raise Exception('Shapes of ensemble', name, 'do not fit')
1402        if obs_a.idl[name] != obs_b.idl[name]:
1403            raise Exception('idl of ensemble', name, 'do not fit')
1404
1405    if obs_a.reweighted is True:
1406        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1407    if obs_b.reweighted is True:
1408        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1409
1410    new_samples = []
1411    new_idl = []
1412    for name in sorted(obs_a.names):
1413        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1414        new_idl.append(obs_a.idl[name])
1415
1416    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1417    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1418    o.reweighted = obs_a.reweighted or obs_b.reweighted
1419    return o
1420
1421
1422def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1423    r'''Calculates the error covariance matrix of a set of observables.
1424
1425    The gamma method has to be applied first to all observables.
1426
1427    Parameters
1428    ----------
1429    obs : list or numpy.ndarray
1430        List or one dimensional array of Obs
1431    visualize : bool
1432        If True plots the corresponding normalized correlation matrix (default False).
1433    correlation : bool
1434        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1435    smooth : None or int
1436        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1437        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1438        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1439        small ones.
1440
1441    Notes
1442    -----
1443    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1444    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1445    in the absence of autocorrelation.
1446    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1447    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1448    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1449    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1450    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1451    '''
1452
1453    length = len(obs)
1454
1455    max_samples = np.max([o.N for o in obs])
1456    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1457        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1458
1459    cov = np.zeros((length, length))
1460    for i in range(length):
1461        for j in range(i, length):
1462            cov[i, j] = _covariance_element(obs[i], obs[j])
1463    cov = cov + cov.T - np.diag(np.diag(cov))
1464
1465    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1466
1467    if isinstance(smooth, int):
1468        corr = _smooth_eigenvalues(corr, smooth)
1469
1470    if visualize:
1471        plt.matshow(corr, vmin=-1, vmax=1)
1472        plt.set_cmap('RdBu')
1473        plt.colorbar()
1474        plt.draw()
1475
1476    if correlation is True:
1477        return corr
1478
1479    errors = [o.dvalue for o in obs]
1480    cov = np.diag(errors) @ corr @ np.diag(errors)
1481
1482    eigenvalues = np.linalg.eigh(cov)[0]
1483    if not np.all(eigenvalues >= 0):
1484        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1485
1486    return cov
1487
1488
1489def _smooth_eigenvalues(corr, E):
1490    """Eigenvalue smoothing as described in hep-lat/9412087
1491
1492    corr : np.ndarray
1493        correlation matrix
1494    E : integer
1495        Number of eigenvalues to be left substantially unchanged
1496    """
1497    if not (2 < E < corr.shape[0] - 1):
1498        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1499    vals, vec = np.linalg.eigh(corr)
1500    lambda_min = np.mean(vals[:-E])
1501    vals[vals < lambda_min] = lambda_min
1502    vals /= np.mean(vals)
1503    return vec @ np.diag(vals) @ vec.T
1504
1505
1506def _covariance_element(obs1, obs2):
1507    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1508
1509    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1510        deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx)
1511        deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx)
1512        return np.sum(deltas1 * deltas2)
1513
1514    if set(obs1.names).isdisjoint(set(obs2.names)):
1515        return 0.0
1516
1517    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1518        raise Exception('The gamma method has to be applied to both Obs first.')
1519
1520    dvalue = 0.0
1521
1522    for e_name in obs1.mc_names:
1523
1524        if e_name not in obs2.mc_names:
1525            continue
1526
1527        idl_d = {}
1528        for r_name in obs1.e_content[e_name]:
1529            if r_name not in obs2.e_content[e_name]:
1530                continue
1531            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1532
1533        gamma = 0.0
1534
1535        for r_name in obs1.e_content[e_name]:
1536            if r_name not in obs2.e_content[e_name]:
1537                continue
1538            if len(idl_d[r_name]) == 0:
1539                continue
1540            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1541
1542        if gamma == 0.0:
1543            continue
1544
1545        gamma_div = 0.0
1546        for r_name in obs1.e_content[e_name]:
1547            if r_name not in obs2.e_content[e_name]:
1548                continue
1549            if len(idl_d[r_name]) == 0:
1550                continue
1551            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1552        gamma /= gamma_div
1553
1554        dvalue += gamma
1555
1556    for e_name in obs1.cov_names:
1557
1558        if e_name not in obs2.cov_names:
1559            continue
1560
1561        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1562
1563    return dvalue
1564
1565
1566def import_jackknife(jacks, name, idl=None):
1567    """Imports jackknife samples and returns an Obs
1568
1569    Parameters
1570    ----------
1571    jacks : numpy.ndarray
1572        numpy array containing the mean value as zeroth entry and
1573        the N jackknife samples as first to Nth entry.
1574    name : str
1575        name of the ensemble the samples are defined on.
1576    """
1577    length = len(jacks) - 1
1578    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1579    samples = jacks[1:] @ prj
1580    mean = np.mean(samples)
1581    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1582    new_obs._value = jacks[0]
1583    return new_obs
1584
1585
1586def merge_obs(list_of_obs):
1587    """Combine all observables in list_of_obs into one new observable
1588
1589    Parameters
1590    ----------
1591    list_of_obs : list
1592        list of the Obs object to be combined
1593
1594    Notes
1595    -----
1596    It is not possible to combine obs which are based on the same replicum
1597    """
1598    replist = [item for obs in list_of_obs for item in obs.names]
1599    if (len(replist) == len(set(replist))) is False:
1600        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1601    if any([len(o.cov_names) for o in list_of_obs]):
1602        raise Exception('Not possible to merge data that contains covobs!')
1603    new_dict = {}
1604    idl_dict = {}
1605    for o in list_of_obs:
1606        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1607                        for key in set(o.deltas) | set(o.r_values)})
1608        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1609
1610    names = sorted(new_dict.keys())
1611    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1612    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1613    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1614    return o
1615
1616
1617def cov_Obs(means, cov, name, grad=None):
1618    """Create an Obs based on mean(s) and a covariance matrix
1619
1620    Parameters
1621    ----------
1622    mean : list of floats or float
1623        N mean value(s) of the new Obs
1624    cov : list or array
1625        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1626    name : str
1627        identifier for the covariance matrix
1628    grad : list or array
1629        Gradient of the Covobs wrt. the means belonging to cov.
1630    """
1631
1632    def covobs_to_obs(co):
1633        """Make an Obs out of a Covobs
1634
1635        Parameters
1636        ----------
1637        co : Covobs
1638            Covobs to be embedded into the Obs
1639        """
1640        o = Obs([], [], means=[])
1641        o._value = co.value
1642        o.names.append(co.name)
1643        o._covobs[co.name] = co
1644        o._dvalue = np.sqrt(co.errsq())
1645        return o
1646
1647    ol = []
1648    if isinstance(means, (float, int)):
1649        means = [means]
1650
1651    for i in range(len(means)):
1652        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1653    if ol[0].covobs[name].N != len(means):
1654        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1655    if len(ol) == 1:
1656        return ol[0]
1657    return ol
class Obs:
 16class Obs:
 17    """Class for a general observable.
 18
 19    Instances of Obs are the basic objects of a pyerrors error analysis.
 20    They are initialized with a list which contains arrays of samples for
 21    different ensembles/replica and another list of same length which contains
 22    the names of the ensembles/replica. Mathematical operations can be
 23    performed on instances. The result is another instance of Obs. The error of
 24    an instance can be computed with the gamma_method. Also contains additional
 25    methods for output and visualization of the error calculation.
 26
 27    Attributes
 28    ----------
 29    S_global : float
 30        Standard value for S (default 2.0)
 31    S_dict : dict
 32        Dictionary for S values. If an entry for a given ensemble
 33        exists this overwrites the standard value for that ensemble.
 34    tau_exp_global : float
 35        Standard value for tau_exp (default 0.0)
 36    tau_exp_dict : dict
 37        Dictionary for tau_exp values. If an entry for a given ensemble exists
 38        this overwrites the standard value for that ensemble.
 39    N_sigma_global : float
 40        Standard value for N_sigma (default 1.0)
 41    N_sigma_dict : dict
 42        Dictionary for N_sigma values. If an entry for a given ensemble exists
 43        this overwrites the standard value for that ensemble.
 44    """
 45    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 46                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 47                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 48                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 49                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
 50
 51    S_global = 2.0
 52    S_dict = {}
 53    tau_exp_global = 0.0
 54    tau_exp_dict = {}
 55    N_sigma_global = 1.0
 56    N_sigma_dict = {}
 57    filter_eps = 1e-10
 58
 59    def __init__(self, samples, names, idl=None, **kwargs):
 60        """ Initialize Obs object.
 61
 62        Parameters
 63        ----------
 64        samples : list
 65            list of numpy arrays containing the Monte Carlo samples
 66        names : list
 67            list of strings labeling the individual samples
 68        idl : list, optional
 69            list of ranges or lists on which the samples are defined
 70        """
 71
 72        if kwargs.get("means") is None and len(samples):
 73            if len(samples) != len(names):
 74                raise Exception('Length of samples and names incompatible.')
 75            if idl is not None:
 76                if len(idl) != len(names):
 77                    raise Exception('Length of idl incompatible with samples and names.')
 78            name_length = len(names)
 79            if name_length > 1:
 80                if name_length != len(set(names)):
 81                    raise Exception('names are not unique.')
 82                if not all(isinstance(x, str) for x in names):
 83                    raise TypeError('All names have to be strings.')
 84            else:
 85                if not isinstance(names[0], str):
 86                    raise TypeError('All names have to be strings.')
 87            if min(len(x) for x in samples) <= 4:
 88                raise Exception('Samples have to have at least 5 entries.')
 89
 90        self.names = sorted(names)
 91        self.shape = {}
 92        self.r_values = {}
 93        self.deltas = {}
 94        self._covobs = {}
 95
 96        self._value = 0
 97        self.N = 0
 98        self.is_merged = {}
 99        self.idl = {}
100        if idl is not None:
101            for name, idx in sorted(zip(names, idl)):
102                if isinstance(idx, range):
103                    self.idl[name] = idx
104                elif isinstance(idx, (list, np.ndarray)):
105                    dc = np.unique(np.diff(idx))
106                    if np.any(dc < 0):
107                        raise Exception("Unsorted idx for idl[%s]" % (name))
108                    if len(dc) == 1:
109                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
110                    else:
111                        self.idl[name] = list(idx)
112                else:
113                    raise Exception('incompatible type for idl[%s].' % (name))
114        else:
115            for name, sample in sorted(zip(names, samples)):
116                self.idl[name] = range(1, len(sample) + 1)
117
118        if kwargs.get("means") is not None:
119            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
120                self.shape[name] = len(self.idl[name])
121                self.N += self.shape[name]
122                self.r_values[name] = mean
123                self.deltas[name] = sample
124        else:
125            for name, sample in sorted(zip(names, samples)):
126                self.shape[name] = len(self.idl[name])
127                self.N += self.shape[name]
128                if len(sample) != self.shape[name]:
129                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
130                self.r_values[name] = np.mean(sample)
131                self.deltas[name] = sample - self.r_values[name]
132                self._value += self.shape[name] * self.r_values[name]
133            self._value /= self.N
134
135        self._dvalue = 0.0
136        self.ddvalue = 0.0
137        self.reweighted = False
138
139        self.tag = None
140
141    @property
142    def value(self):
143        return self._value
144
145    @property
146    def dvalue(self):
147        return self._dvalue
148
149    @property
150    def e_names(self):
151        return sorted(set([o.split('|')[0] for o in self.names]))
152
153    @property
154    def cov_names(self):
155        return sorted(set([o for o in self.covobs.keys()]))
156
157    @property
158    def mc_names(self):
159        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
160
161    @property
162    def e_content(self):
163        res = {}
164        for e, e_name in enumerate(self.e_names):
165            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
166            if e_name in self.names:
167                res[e_name].append(e_name)
168        return res
169
170    @property
171    def covobs(self):
172        return self._covobs
173
174    def gamma_method(self, **kwargs):
175        """Estimate the error and related properties of the Obs.
176
177        Parameters
178        ----------
179        S : float
180            specifies a custom value for the parameter S (default 2.0).
181            If set to 0 it is assumed that the data exhibits no
182            autocorrelation. In this case the error estimates coincides
183            with the sample standard error.
184        tau_exp : float
185            positive value triggers the critical slowing down analysis
186            (default 0.0).
187        N_sigma : float
188            number of standard deviations from zero until the tail is
189            attached to the autocorrelation function (default 1).
190        fft : bool
191            determines whether the fft algorithm is used for the computation
192            of the autocorrelation function (default True)
193        """
194
195        e_content = self.e_content
196        self.e_dvalue = {}
197        self.e_ddvalue = {}
198        self.e_tauint = {}
199        self.e_dtauint = {}
200        self.e_windowsize = {}
201        self.e_n_tauint = {}
202        self.e_n_dtauint = {}
203        e_gamma = {}
204        self.e_rho = {}
205        self.e_drho = {}
206        self._dvalue = 0
207        self.ddvalue = 0
208
209        self.S = {}
210        self.tau_exp = {}
211        self.N_sigma = {}
212
213        if kwargs.get('fft') is False:
214            fft = False
215        else:
216            fft = True
217
218        def _parse_kwarg(kwarg_name):
219            if kwarg_name in kwargs:
220                tmp = kwargs.get(kwarg_name)
221                if isinstance(tmp, (int, float)):
222                    if tmp < 0:
223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
224                    for e, e_name in enumerate(self.e_names):
225                        getattr(self, kwarg_name)[e_name] = tmp
226                else:
227                    raise TypeError(kwarg_name + ' is not in proper format.')
228            else:
229                for e, e_name in enumerate(self.e_names):
230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
232                    else:
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
234
235        _parse_kwarg('S')
236        _parse_kwarg('tau_exp')
237        _parse_kwarg('N_sigma')
238
239        for e, e_name in enumerate(self.mc_names):
240            r_length = []
241            for r_name in e_content[e_name]:
242                if isinstance(self.idl[r_name], range):
243                    r_length.append(len(self.idl[r_name]))
244                else:
245                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
246
247            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
248            w_max = max(r_length) // 2
249            e_gamma[e_name] = np.zeros(w_max)
250            self.e_rho[e_name] = np.zeros(w_max)
251            self.e_drho[e_name] = np.zeros(w_max)
252
253            for r_name in e_content[e_name]:
254                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
255
256            gamma_div = np.zeros(w_max)
257            for r_name in e_content[e_name]:
258                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
259            gamma_div[gamma_div < 1] = 1.0
260            e_gamma[e_name] /= gamma_div[:w_max]
261
262            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
263                self.e_tauint[e_name] = 0.5
264                self.e_dtauint[e_name] = 0.0
265                self.e_dvalue[e_name] = 0.0
266                self.e_ddvalue[e_name] = 0.0
267                self.e_windowsize[e_name] = 0
268                continue
269
270            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
271            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
272            # Make sure no entry of tauint is smaller than 0.5
273            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
274            # hep-lat/0306017 eq. (42)
275            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
276            self.e_n_dtauint[e_name][0] = 0.0
277
278            def _compute_drho(i):
279                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
280                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
281
282            _compute_drho(1)
283            if self.tau_exp[e_name] > 0:
284                texp = self.tau_exp[e_name]
285                # Critical slowing down analysis
286                if w_max // 2 <= 1:
287                    raise Exception("Need at least 8 samples for tau_exp error analysis")
288                for n in range(1, w_max // 2):
289                    _compute_drho(n + 1)
290                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
291                        # Bias correction hep-lat/0306017 eq. (49) included
292                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
293                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
294                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
295                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
296                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
297                        self.e_windowsize[e_name] = n
298                        break
299            else:
300                if self.S[e_name] == 0.0:
301                    self.e_tauint[e_name] = 0.5
302                    self.e_dtauint[e_name] = 0.0
303                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
304                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
305                    self.e_windowsize[e_name] = 0
306                else:
307                    # Standard automatic windowing procedure
308                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
309                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
310                    for n in range(1, w_max):
311                        if n < w_max // 2 - 2:
312                            _compute_drho(n + 1)
313                        if g_w[n - 1] < 0 or n >= w_max - 1:
314                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
315                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
316                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
317                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
318                            self.e_windowsize[e_name] = n
319                            break
320
321            self._dvalue += self.e_dvalue[e_name] ** 2
322            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
323
324        for e_name in self.cov_names:
325            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
326            self.e_ddvalue[e_name] = 0
327            self._dvalue += self.e_dvalue[e_name]**2
328
329        self._dvalue = np.sqrt(self._dvalue)
330        if self._dvalue == 0.0:
331            self.ddvalue = 0.0
332        else:
333            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
334        return
335
336    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
337        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
338           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
339
340        Parameters
341        ----------
342        deltas : list
343            List of fluctuations
344        idx : list
345            List or range of configurations on which the deltas are defined.
346        shape : int
347            Number of configurations in idx.
348        w_max : int
349            Upper bound for the summation window.
350        fft : bool
351            determines whether the fft algorithm is used for the computation
352            of the autocorrelation function.
353        """
354        gamma = np.zeros(w_max)
355        deltas = _expand_deltas(deltas, idx, shape)
356        new_shape = len(deltas)
357        if fft:
358            max_gamma = min(new_shape, w_max)
359            # The padding for the fft has to be even
360            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
361            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
362        else:
363            for n in range(w_max):
364                if new_shape - n >= 0:
365                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
366
367        return gamma
368
369    def details(self, ens_content=True):
370        """Output detailed properties of the Obs.
371
372        Parameters
373        ----------
374        ens_content : bool
375            print details about the ensembles and replica if true.
376        """
377        if self.tag is not None:
378            print("Description:", self.tag)
379        if not hasattr(self, 'e_dvalue'):
380            print('Result\t %3.8e' % (self.value))
381        else:
382            if self.value == 0.0:
383                percentage = np.nan
384            else:
385                percentage = np.abs(self._dvalue / self.value) * 100
386            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
387            if len(self.e_names) > 1:
388                print(' Ensemble errors:')
389            for e_name in self.mc_names:
390                if len(self.e_names) > 1:
391                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
392                if self.tau_exp[e_name] > 0:
393                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
394                else:
395                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
396            for e_name in self.cov_names:
397                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
398        if ens_content is True:
399            if len(self.e_names) == 1:
400                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
401            else:
402                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
403            my_string_list = []
404            for key, value in sorted(self.e_content.items()):
405                if key not in self.covobs:
406                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
407                    if len(value) == 1:
408                        my_string += f': {self.shape[value[0]]} configurations'
409                        if isinstance(self.idl[value[0]], range):
410                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
411                        else:
412                            my_string += ' (irregular range)'
413                    else:
414                        sublist = []
415                        for v in value:
416                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
417                            my_substring += f': {self.shape[v]} configurations'
418                            if isinstance(self.idl[v], range):
419                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
420                            else:
421                                my_substring += ' (irregular range)'
422                            sublist.append(my_substring)
423
424                        my_string += '\n' + '\n'.join(sublist)
425                else:
426                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
427                my_string_list.append(my_string)
428            print('\n'.join(my_string_list))
429
430    def reweight(self, weight):
431        """Reweight the obs with given rewighting factors.
432
433        Parameters
434        ----------
435        weight : Obs
436            Reweighting factor. An Observable that has to be defined on a superset of the
437            configurations in obs[i].idl for all i.
438        all_configs : bool
439            if True, the reweighted observables are normalized by the average of
440            the reweighting factor on all configurations in weight.idl and not
441            on the configurations in obs[i].idl. Default False.
442        """
443        return reweight(weight, [self])[0]
444
445    def is_zero_within_error(self, sigma=1):
446        """Checks whether the observable is zero within 'sigma' standard errors.
447
448        Parameters
449        ----------
450        sigma : int
451            Number of standard errors used for the check.
452
453        Works only properly when the gamma method was run.
454        """
455        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
456
457    def is_zero(self, atol=1e-10):
458        """Checks whether the observable is zero within a given tolerance.
459
460        Parameters
461        ----------
462        atol : float
463            Absolute tolerance (for details see numpy documentation).
464        """
465        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
466
467    def plot_tauint(self, save=None):
468        """Plot integrated autocorrelation time for each ensemble.
469
470        Parameters
471        ----------
472        save : str
473            saves the figure to a file named 'save' if.
474        """
475        if not hasattr(self, 'e_dvalue'):
476            raise Exception('Run the gamma method first.')
477
478        for e, e_name in enumerate(self.mc_names):
479            fig = plt.figure()
480            plt.xlabel(r'$W$')
481            plt.ylabel(r'$\tau_\mathrm{int}$')
482            length = int(len(self.e_n_tauint[e_name]))
483            if self.tau_exp[e_name] > 0:
484                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
485                x_help = np.arange(2 * self.tau_exp[e_name])
486                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
487                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
488                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
489                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
490                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
491                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
492                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
493            else:
494                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
495                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
496
497            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
498            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
499            plt.legend()
500            plt.xlim(-0.5, xmax)
501            ylim = plt.ylim()
502            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
503            plt.draw()
504            if save:
505                fig.savefig(save + "_" + str(e))
506
507    def plot_rho(self, save=None):
508        """Plot normalized autocorrelation function time for each ensemble.
509
510        Parameters
511        ----------
512        save : str
513            saves the figure to a file named 'save' if.
514        """
515        if not hasattr(self, 'e_dvalue'):
516            raise Exception('Run the gamma method first.')
517        for e, e_name in enumerate(self.mc_names):
518            fig = plt.figure()
519            plt.xlabel('W')
520            plt.ylabel('rho')
521            length = int(len(self.e_drho[e_name]))
522            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
523            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
524            if self.tau_exp[e_name] > 0:
525                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
526                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
527                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
528                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
529            else:
530                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
531                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
532            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
533            plt.xlim(-0.5, xmax)
534            plt.draw()
535            if save:
536                fig.savefig(save + "_" + str(e))
537
538    def plot_rep_dist(self):
539        """Plot replica distribution for each ensemble with more than one replicum."""
540        if not hasattr(self, 'e_dvalue'):
541            raise Exception('Run the gamma method first.')
542        for e, e_name in enumerate(self.mc_names):
543            if len(self.e_content[e_name]) == 1:
544                print('No replica distribution for a single replicum (', e_name, ')')
545                continue
546            r_length = []
547            sub_r_mean = 0
548            for r, r_name in enumerate(self.e_content[e_name]):
549                r_length.append(len(self.deltas[r_name]))
550                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
551            e_N = np.sum(r_length)
552            sub_r_mean /= e_N
553            arr = np.zeros(len(self.e_content[e_name]))
554            for r, r_name in enumerate(self.e_content[e_name]):
555                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
556            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
557            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
558            plt.draw()
559
560    def plot_history(self, expand=True):
561        """Plot derived Monte Carlo history for each ensemble
562
563        Parameters
564        ----------
565        expand : bool
566            show expanded history for irregular Monte Carlo chains (default: True).
567        """
568        for e, e_name in enumerate(self.mc_names):
569            plt.figure()
570            r_length = []
571            tmp = []
572            tmp_expanded = []
573            for r, r_name in enumerate(self.e_content[e_name]):
574                tmp.append(self.deltas[r_name] + self.r_values[r_name])
575                if expand:
576                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
577                    r_length.append(len(tmp_expanded[-1]))
578                else:
579                    r_length.append(len(tmp[-1]))
580            e_N = np.sum(r_length)
581            x = np.arange(e_N)
582            y_test = np.concatenate(tmp, axis=0)
583            if expand:
584                y = np.concatenate(tmp_expanded, axis=0)
585            else:
586                y = y_test
587            plt.errorbar(x, y, fmt='.', markersize=3)
588            plt.xlim(-0.5, e_N - 0.5)
589            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
590            plt.draw()
591
592    def plot_piechart(self, save=None):
593        """Plot piechart which shows the fractional contribution of each
594        ensemble to the error and returns a dictionary containing the fractions.
595
596        Parameters
597        ----------
598        save : str
599            saves the figure to a file named 'save' if.
600        """
601        if not hasattr(self, 'e_dvalue'):
602            raise Exception('Run the gamma method first.')
603        if np.isclose(0.0, self._dvalue, atol=1e-15):
604            raise Exception('Error is 0.0')
605        labels = self.e_names
606        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
607        fig1, ax1 = plt.subplots()
608        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
609        ax1.axis('equal')
610        plt.draw()
611        if save:
612            fig1.savefig(save)
613
614        return dict(zip(self.e_names, sizes))
615
616    def dump(self, filename, datatype="json.gz", description="", **kwargs):
617        """Dump the Obs to a file 'name' of chosen format.
618
619        Parameters
620        ----------
621        filename : str
622            name of the file to be saved.
623        datatype : str
624            Format of the exported file. Supported formats include
625            "json.gz" and "pickle"
626        description : str
627            Description for output file, only relevant for json.gz format.
628        path : str
629            specifies a custom path for the file (default '.')
630        """
631        if 'path' in kwargs:
632            file_name = kwargs.get('path') + '/' + filename
633        else:
634            file_name = filename
635
636        if datatype == "json.gz":
637            from .input.json import dump_to_json
638            dump_to_json([self], file_name, description=description)
639        elif datatype == "pickle":
640            with open(file_name + '.p', 'wb') as fb:
641                pickle.dump(self, fb)
642        else:
643            raise Exception("Unknown datatype " + str(datatype))
644
645    def export_jackknife(self):
646        """Export jackknife samples from the Obs
647
648        Returns
649        -------
650        numpy.ndarray
651            Returns a numpy array of length N + 1 where N is the number of samples
652            for the given ensemble and replicum. The zeroth entry of the array contains
653            the mean value of the Obs, entries 1 to N contain the N jackknife samples
654            derived from the Obs. The current implementation only works for observables
655            defined on exactly one ensemble and replicum. The derived jackknife samples
656            should agree with samples from a full jackknife analysis up to O(1/N).
657        """
658
659        if len(self.names) != 1:
660            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
661
662        name = self.names[0]
663        full_data = self.deltas[name] + self.r_values[name]
664        n = full_data.size
665        mean = self.value
666        tmp_jacks = np.zeros(n + 1)
667        tmp_jacks[0] = mean
668        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
669        return tmp_jacks
670
671    def __float__(self):
672        return float(self.value)
673
674    def __repr__(self):
675        return 'Obs[' + str(self) + ']'
676
677    def __str__(self):
678        if self._dvalue == 0.0:
679            return str(self.value)
680        fexp = np.floor(np.log10(self._dvalue))
681        if fexp < 0.0:
682            return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
683        elif fexp == 0.0:
684            return '{:.1f}({:1.1f})'.format(self.value, self._dvalue)
685        else:
686            return '{:.0f}({:2.0f})'.format(self.value, self._dvalue)
687
688    # Overload comparisons
689    def __lt__(self, other):
690        return self.value < other
691
692    def __le__(self, other):
693        return self.value <= other
694
695    def __gt__(self, other):
696        return self.value > other
697
698    def __ge__(self, other):
699        return self.value >= other
700
701    def __eq__(self, other):
702        return (self - other).is_zero()
703
704    def __ne__(self, other):
705        return not (self - other).is_zero()
706
707    # Overload math operations
708    def __add__(self, y):
709        if isinstance(y, Obs):
710            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
711        else:
712            if isinstance(y, np.ndarray):
713                return np.array([self + o for o in y])
714            elif y.__class__.__name__ in ['Corr', 'CObs']:
715                return NotImplemented
716            else:
717                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
718
719    def __radd__(self, y):
720        return self + y
721
722    def __mul__(self, y):
723        if isinstance(y, Obs):
724            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
725        else:
726            if isinstance(y, np.ndarray):
727                return np.array([self * o for o in y])
728            elif isinstance(y, complex):
729                return CObs(self * y.real, self * y.imag)
730            elif y.__class__.__name__ in ['Corr', 'CObs']:
731                return NotImplemented
732            else:
733                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
734
735    def __rmul__(self, y):
736        return self * y
737
738    def __sub__(self, y):
739        if isinstance(y, Obs):
740            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
741        else:
742            if isinstance(y, np.ndarray):
743                return np.array([self - o for o in y])
744            elif y.__class__.__name__ in ['Corr', 'CObs']:
745                return NotImplemented
746            else:
747                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
748
749    def __rsub__(self, y):
750        return -1 * (self - y)
751
752    def __pos__(self):
753        return self
754
755    def __neg__(self):
756        return -1 * self
757
758    def __truediv__(self, y):
759        if isinstance(y, Obs):
760            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
761        else:
762            if isinstance(y, np.ndarray):
763                return np.array([self / o for o in y])
764            elif y.__class__.__name__ in ['Corr', 'CObs']:
765                return NotImplemented
766            else:
767                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
768
769    def __rtruediv__(self, y):
770        if isinstance(y, Obs):
771            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
772        else:
773            if isinstance(y, np.ndarray):
774                return np.array([o / self for o in y])
775            elif y.__class__.__name__ in ['Corr', 'CObs']:
776                return NotImplemented
777            else:
778                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
779
780    def __pow__(self, y):
781        if isinstance(y, Obs):
782            return derived_observable(lambda x: x[0] ** x[1], [self, y])
783        else:
784            return derived_observable(lambda x: x[0] ** y, [self])
785
786    def __rpow__(self, y):
787        if isinstance(y, Obs):
788            return derived_observable(lambda x: x[0] ** x[1], [y, self])
789        else:
790            return derived_observable(lambda x: y ** x[0], [self])
791
792    def __abs__(self):
793        return derived_observable(lambda x: anp.abs(x[0]), [self])
794
795    # Overload numpy functions
796    def sqrt(self):
797        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
798
799    def log(self):
800        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
801
802    def exp(self):
803        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
804
805    def sin(self):
806        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
807
808    def cos(self):
809        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
810
811    def tan(self):
812        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
813
814    def arcsin(self):
815        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
816
817    def arccos(self):
818        return derived_observable(lambda x: anp.arccos(x[0]), [self])
819
820    def arctan(self):
821        return derived_observable(lambda x: anp.arctan(x[0]), [self])
822
823    def sinh(self):
824        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
825
826    def cosh(self):
827        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
828
829    def tanh(self):
830        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
831
832    def arcsinh(self):
833        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
834
835    def arccosh(self):
836        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
837
838    def arctanh(self):
839        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 59    def __init__(self, samples, names, idl=None, **kwargs):
 60        """ Initialize Obs object.
 61
 62        Parameters
 63        ----------
 64        samples : list
 65            list of numpy arrays containing the Monte Carlo samples
 66        names : list
 67            list of strings labeling the individual samples
 68        idl : list, optional
 69            list of ranges or lists on which the samples are defined
 70        """
 71
 72        if kwargs.get("means") is None and len(samples):
 73            if len(samples) != len(names):
 74                raise Exception('Length of samples and names incompatible.')
 75            if idl is not None:
 76                if len(idl) != len(names):
 77                    raise Exception('Length of idl incompatible with samples and names.')
 78            name_length = len(names)
 79            if name_length > 1:
 80                if name_length != len(set(names)):
 81                    raise Exception('names are not unique.')
 82                if not all(isinstance(x, str) for x in names):
 83                    raise TypeError('All names have to be strings.')
 84            else:
 85                if not isinstance(names[0], str):
 86                    raise TypeError('All names have to be strings.')
 87            if min(len(x) for x in samples) <= 4:
 88                raise Exception('Samples have to have at least 5 entries.')
 89
 90        self.names = sorted(names)
 91        self.shape = {}
 92        self.r_values = {}
 93        self.deltas = {}
 94        self._covobs = {}
 95
 96        self._value = 0
 97        self.N = 0
 98        self.is_merged = {}
 99        self.idl = {}
100        if idl is not None:
101            for name, idx in sorted(zip(names, idl)):
102                if isinstance(idx, range):
103                    self.idl[name] = idx
104                elif isinstance(idx, (list, np.ndarray)):
105                    dc = np.unique(np.diff(idx))
106                    if np.any(dc < 0):
107                        raise Exception("Unsorted idx for idl[%s]" % (name))
108                    if len(dc) == 1:
109                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
110                    else:
111                        self.idl[name] = list(idx)
112                else:
113                    raise Exception('incompatible type for idl[%s].' % (name))
114        else:
115            for name, sample in sorted(zip(names, samples)):
116                self.idl[name] = range(1, len(sample) + 1)
117
118        if kwargs.get("means") is not None:
119            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
120                self.shape[name] = len(self.idl[name])
121                self.N += self.shape[name]
122                self.r_values[name] = mean
123                self.deltas[name] = sample
124        else:
125            for name, sample in sorted(zip(names, samples)):
126                self.shape[name] = len(self.idl[name])
127                self.N += self.shape[name]
128                if len(sample) != self.shape[name]:
129                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
130                self.r_values[name] = np.mean(sample)
131                self.deltas[name] = sample - self.r_values[name]
132                self._value += self.shape[name] * self.r_values[name]
133            self._value /= self.N
134
135        self._dvalue = 0.0
136        self.ddvalue = 0.0
137        self.reweighted = False
138
139        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
S_global = 2.0
S_dict = {}
tau_exp_global = 0.0
tau_exp_dict = {}
N_sigma_global = 1.0
N_sigma_dict = {}
filter_eps = 1e-10
names
shape
r_values
deltas
N
is_merged
idl
ddvalue
reweighted
tag
value
dvalue
e_names
cov_names
mc_names
e_content
covobs
def gamma_method(self, **kwargs)
174    def gamma_method(self, **kwargs):
175        """Estimate the error and related properties of the Obs.
176
177        Parameters
178        ----------
179        S : float
180            specifies a custom value for the parameter S (default 2.0).
181            If set to 0 it is assumed that the data exhibits no
182            autocorrelation. In this case the error estimates coincides
183            with the sample standard error.
184        tau_exp : float
185            positive value triggers the critical slowing down analysis
186            (default 0.0).
187        N_sigma : float
188            number of standard deviations from zero until the tail is
189            attached to the autocorrelation function (default 1).
190        fft : bool
191            determines whether the fft algorithm is used for the computation
192            of the autocorrelation function (default True)
193        """
194
195        e_content = self.e_content
196        self.e_dvalue = {}
197        self.e_ddvalue = {}
198        self.e_tauint = {}
199        self.e_dtauint = {}
200        self.e_windowsize = {}
201        self.e_n_tauint = {}
202        self.e_n_dtauint = {}
203        e_gamma = {}
204        self.e_rho = {}
205        self.e_drho = {}
206        self._dvalue = 0
207        self.ddvalue = 0
208
209        self.S = {}
210        self.tau_exp = {}
211        self.N_sigma = {}
212
213        if kwargs.get('fft') is False:
214            fft = False
215        else:
216            fft = True
217
218        def _parse_kwarg(kwarg_name):
219            if kwarg_name in kwargs:
220                tmp = kwargs.get(kwarg_name)
221                if isinstance(tmp, (int, float)):
222                    if tmp < 0:
223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
224                    for e, e_name in enumerate(self.e_names):
225                        getattr(self, kwarg_name)[e_name] = tmp
226                else:
227                    raise TypeError(kwarg_name + ' is not in proper format.')
228            else:
229                for e, e_name in enumerate(self.e_names):
230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
232                    else:
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
234
235        _parse_kwarg('S')
236        _parse_kwarg('tau_exp')
237        _parse_kwarg('N_sigma')
238
239        for e, e_name in enumerate(self.mc_names):
240            r_length = []
241            for r_name in e_content[e_name]:
242                if isinstance(self.idl[r_name], range):
243                    r_length.append(len(self.idl[r_name]))
244                else:
245                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
246
247            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
248            w_max = max(r_length) // 2
249            e_gamma[e_name] = np.zeros(w_max)
250            self.e_rho[e_name] = np.zeros(w_max)
251            self.e_drho[e_name] = np.zeros(w_max)
252
253            for r_name in e_content[e_name]:
254                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
255
256            gamma_div = np.zeros(w_max)
257            for r_name in e_content[e_name]:
258                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
259            gamma_div[gamma_div < 1] = 1.0
260            e_gamma[e_name] /= gamma_div[:w_max]
261
262            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
263                self.e_tauint[e_name] = 0.5
264                self.e_dtauint[e_name] = 0.0
265                self.e_dvalue[e_name] = 0.0
266                self.e_ddvalue[e_name] = 0.0
267                self.e_windowsize[e_name] = 0
268                continue
269
270            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
271            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
272            # Make sure no entry of tauint is smaller than 0.5
273            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
274            # hep-lat/0306017 eq. (42)
275            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
276            self.e_n_dtauint[e_name][0] = 0.0
277
278            def _compute_drho(i):
279                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
280                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
281
282            _compute_drho(1)
283            if self.tau_exp[e_name] > 0:
284                texp = self.tau_exp[e_name]
285                # Critical slowing down analysis
286                if w_max // 2 <= 1:
287                    raise Exception("Need at least 8 samples for tau_exp error analysis")
288                for n in range(1, w_max // 2):
289                    _compute_drho(n + 1)
290                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
291                        # Bias correction hep-lat/0306017 eq. (49) included
292                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
293                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
294                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
295                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
296                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
297                        self.e_windowsize[e_name] = n
298                        break
299            else:
300                if self.S[e_name] == 0.0:
301                    self.e_tauint[e_name] = 0.5
302                    self.e_dtauint[e_name] = 0.0
303                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
304                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
305                    self.e_windowsize[e_name] = 0
306                else:
307                    # Standard automatic windowing procedure
308                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
309                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
310                    for n in range(1, w_max):
311                        if n < w_max // 2 - 2:
312                            _compute_drho(n + 1)
313                        if g_w[n - 1] < 0 or n >= w_max - 1:
314                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
315                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
316                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
317                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
318                            self.e_windowsize[e_name] = n
319                            break
320
321            self._dvalue += self.e_dvalue[e_name] ** 2
322            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
323
324        for e_name in self.cov_names:
325            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
326            self.e_ddvalue[e_name] = 0
327            self._dvalue += self.e_dvalue[e_name]**2
328
329        self._dvalue = np.sqrt(self._dvalue)
330        if self._dvalue == 0.0:
331            self.ddvalue = 0.0
332        else:
333            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
334        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True)
369    def details(self, ens_content=True):
370        """Output detailed properties of the Obs.
371
372        Parameters
373        ----------
374        ens_content : bool
375            print details about the ensembles and replica if true.
376        """
377        if self.tag is not None:
378            print("Description:", self.tag)
379        if not hasattr(self, 'e_dvalue'):
380            print('Result\t %3.8e' % (self.value))
381        else:
382            if self.value == 0.0:
383                percentage = np.nan
384            else:
385                percentage = np.abs(self._dvalue / self.value) * 100
386            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
387            if len(self.e_names) > 1:
388                print(' Ensemble errors:')
389            for e_name in self.mc_names:
390                if len(self.e_names) > 1:
391                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
392                if self.tau_exp[e_name] > 0:
393                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
394                else:
395                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
396            for e_name in self.cov_names:
397                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
398        if ens_content is True:
399            if len(self.e_names) == 1:
400                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
401            else:
402                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
403            my_string_list = []
404            for key, value in sorted(self.e_content.items()):
405                if key not in self.covobs:
406                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
407                    if len(value) == 1:
408                        my_string += f': {self.shape[value[0]]} configurations'
409                        if isinstance(self.idl[value[0]], range):
410                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
411                        else:
412                            my_string += ' (irregular range)'
413                    else:
414                        sublist = []
415                        for v in value:
416                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
417                            my_substring += f': {self.shape[v]} configurations'
418                            if isinstance(self.idl[v], range):
419                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
420                            else:
421                                my_substring += ' (irregular range)'
422                            sublist.append(my_substring)
423
424                        my_string += '\n' + '\n'.join(sublist)
425                else:
426                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
427                my_string_list.append(my_string)
428            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def reweight(self, weight)
430    def reweight(self, weight):
431        """Reweight the obs with given rewighting factors.
432
433        Parameters
434        ----------
435        weight : Obs
436            Reweighting factor. An Observable that has to be defined on a superset of the
437            configurations in obs[i].idl for all i.
438        all_configs : bool
439            if True, the reweighted observables are normalized by the average of
440            the reweighting factor on all configurations in weight.idl and not
441            on the configurations in obs[i].idl. Default False.
442        """
443        return reweight(weight, [self])[0]

Reweight the obs with given rewighting factors.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def is_zero_within_error(self, sigma=1)
445    def is_zero_within_error(self, sigma=1):
446        """Checks whether the observable is zero within 'sigma' standard errors.
447
448        Parameters
449        ----------
450        sigma : int
451            Number of standard errors used for the check.
452
453        Works only properly when the gamma method was run.
454        """
455        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10)
457    def is_zero(self, atol=1e-10):
458        """Checks whether the observable is zero within a given tolerance.
459
460        Parameters
461        ----------
462        atol : float
463            Absolute tolerance (for details see numpy documentation).
464        """
465        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None)
467    def plot_tauint(self, save=None):
468        """Plot integrated autocorrelation time for each ensemble.
469
470        Parameters
471        ----------
472        save : str
473            saves the figure to a file named 'save' if.
474        """
475        if not hasattr(self, 'e_dvalue'):
476            raise Exception('Run the gamma method first.')
477
478        for e, e_name in enumerate(self.mc_names):
479            fig = plt.figure()
480            plt.xlabel(r'$W$')
481            plt.ylabel(r'$\tau_\mathrm{int}$')
482            length = int(len(self.e_n_tauint[e_name]))
483            if self.tau_exp[e_name] > 0:
484                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
485                x_help = np.arange(2 * self.tau_exp[e_name])
486                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
487                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
488                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
489                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
490                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
491                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
492                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
493            else:
494                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
495                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
496
497            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
498            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
499            plt.legend()
500            plt.xlim(-0.5, xmax)
501            ylim = plt.ylim()
502            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
503            plt.draw()
504            if save:
505                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None)
507    def plot_rho(self, save=None):
508        """Plot normalized autocorrelation function time for each ensemble.
509
510        Parameters
511        ----------
512        save : str
513            saves the figure to a file named 'save' if.
514        """
515        if not hasattr(self, 'e_dvalue'):
516            raise Exception('Run the gamma method first.')
517        for e, e_name in enumerate(self.mc_names):
518            fig = plt.figure()
519            plt.xlabel('W')
520            plt.ylabel('rho')
521            length = int(len(self.e_drho[e_name]))
522            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
523            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
524            if self.tau_exp[e_name] > 0:
525                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
526                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
527                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
528                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
529            else:
530                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
531                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
532            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
533            plt.xlim(-0.5, xmax)
534            plt.draw()
535            if save:
536                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self)
538    def plot_rep_dist(self):
539        """Plot replica distribution for each ensemble with more than one replicum."""
540        if not hasattr(self, 'e_dvalue'):
541            raise Exception('Run the gamma method first.')
542        for e, e_name in enumerate(self.mc_names):
543            if len(self.e_content[e_name]) == 1:
544                print('No replica distribution for a single replicum (', e_name, ')')
545                continue
546            r_length = []
547            sub_r_mean = 0
548            for r, r_name in enumerate(self.e_content[e_name]):
549                r_length.append(len(self.deltas[r_name]))
550                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
551            e_N = np.sum(r_length)
552            sub_r_mean /= e_N
553            arr = np.zeros(len(self.e_content[e_name]))
554            for r, r_name in enumerate(self.e_content[e_name]):
555                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
556            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
557            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
558            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True)
560    def plot_history(self, expand=True):
561        """Plot derived Monte Carlo history for each ensemble
562
563        Parameters
564        ----------
565        expand : bool
566            show expanded history for irregular Monte Carlo chains (default: True).
567        """
568        for e, e_name in enumerate(self.mc_names):
569            plt.figure()
570            r_length = []
571            tmp = []
572            tmp_expanded = []
573            for r, r_name in enumerate(self.e_content[e_name]):
574                tmp.append(self.deltas[r_name] + self.r_values[r_name])
575                if expand:
576                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
577                    r_length.append(len(tmp_expanded[-1]))
578                else:
579                    r_length.append(len(tmp[-1]))
580            e_N = np.sum(r_length)
581            x = np.arange(e_N)
582            y_test = np.concatenate(tmp, axis=0)
583            if expand:
584                y = np.concatenate(tmp_expanded, axis=0)
585            else:
586                y = y_test
587            plt.errorbar(x, y, fmt='.', markersize=3)
588            plt.xlim(-0.5, e_N - 0.5)
589            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
590            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None)
592    def plot_piechart(self, save=None):
593        """Plot piechart which shows the fractional contribution of each
594        ensemble to the error and returns a dictionary containing the fractions.
595
596        Parameters
597        ----------
598        save : str
599            saves the figure to a file named 'save' if.
600        """
601        if not hasattr(self, 'e_dvalue'):
602            raise Exception('Run the gamma method first.')
603        if np.isclose(0.0, self._dvalue, atol=1e-15):
604            raise Exception('Error is 0.0')
605        labels = self.e_names
606        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
607        fig1, ax1 = plt.subplots()
608        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
609        ax1.axis('equal')
610        plt.draw()
611        if save:
612            fig1.savefig(save)
613
614        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs)
616    def dump(self, filename, datatype="json.gz", description="", **kwargs):
617        """Dump the Obs to a file 'name' of chosen format.
618
619        Parameters
620        ----------
621        filename : str
622            name of the file to be saved.
623        datatype : str
624            Format of the exported file. Supported formats include
625            "json.gz" and "pickle"
626        description : str
627            Description for output file, only relevant for json.gz format.
628        path : str
629            specifies a custom path for the file (default '.')
630        """
631        if 'path' in kwargs:
632            file_name = kwargs.get('path') + '/' + filename
633        else:
634            file_name = filename
635
636        if datatype == "json.gz":
637            from .input.json import dump_to_json
638            dump_to_json([self], file_name, description=description)
639        elif datatype == "pickle":
640            with open(file_name + '.p', 'wb') as fb:
641                pickle.dump(self, fb)
642        else:
643            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self)
645    def export_jackknife(self):
646        """Export jackknife samples from the Obs
647
648        Returns
649        -------
650        numpy.ndarray
651            Returns a numpy array of length N + 1 where N is the number of samples
652            for the given ensemble and replicum. The zeroth entry of the array contains
653            the mean value of the Obs, entries 1 to N contain the N jackknife samples
654            derived from the Obs. The current implementation only works for observables
655            defined on exactly one ensemble and replicum. The derived jackknife samples
656            should agree with samples from a full jackknife analysis up to O(1/N).
657        """
658
659        if len(self.names) != 1:
660            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
661
662        name = self.names[0]
663        full_data = self.deltas[name] + self.r_values[name]
664        n = full_data.size
665        mean = self.value
666        tmp_jacks = np.zeros(n + 1)
667        tmp_jacks[0] = mean
668        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
669        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def sqrt(self)
796    def sqrt(self):
797        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self)
799    def log(self):
800        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self)
802    def exp(self):
803        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self)
805    def sin(self):
806        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self)
808    def cos(self):
809        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self)
811    def tan(self):
812        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self)
814    def arcsin(self):
815        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self)
817    def arccos(self):
818        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self)
820    def arctan(self):
821        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self)
823    def sinh(self):
824        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self)
826    def cosh(self):
827        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self)
829    def tanh(self):
830        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self)
832    def arcsinh(self):
833        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self)
835    def arccosh(self):
836        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self)
838    def arctanh(self):
839        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
N_sigma
S
e_ddvalue
e_drho
e_dtauint
e_dvalue
e_n_dtauint
e_n_tauint
e_rho
e_tauint
e_windowsize
tau_exp
class CObs:
842class CObs:
843    """Class for a complex valued observable."""
844    __slots__ = ['_real', '_imag', 'tag']
845
846    def __init__(self, real, imag=0.0):
847        self._real = real
848        self._imag = imag
849        self.tag = None
850
851    @property
852    def real(self):
853        return self._real
854
855    @property
856    def imag(self):
857        return self._imag
858
859    def gamma_method(self, **kwargs):
860        """Executes the gamma_method for the real and the imaginary part."""
861        if isinstance(self.real, Obs):
862            self.real.gamma_method(**kwargs)
863        if isinstance(self.imag, Obs):
864            self.imag.gamma_method(**kwargs)
865
866    def is_zero(self):
867        """Checks whether both real and imaginary part are zero within machine precision."""
868        return self.real == 0.0 and self.imag == 0.0
869
870    def conjugate(self):
871        return CObs(self.real, -self.imag)
872
873    def __add__(self, other):
874        if isinstance(other, np.ndarray):
875            return other + self
876        elif hasattr(other, 'real') and hasattr(other, 'imag'):
877            return CObs(self.real + other.real,
878                        self.imag + other.imag)
879        else:
880            return CObs(self.real + other, self.imag)
881
882    def __radd__(self, y):
883        return self + y
884
885    def __sub__(self, other):
886        if isinstance(other, np.ndarray):
887            return -1 * (other - self)
888        elif hasattr(other, 'real') and hasattr(other, 'imag'):
889            return CObs(self.real - other.real, self.imag - other.imag)
890        else:
891            return CObs(self.real - other, self.imag)
892
893    def __rsub__(self, other):
894        return -1 * (self - other)
895
896    def __mul__(self, other):
897        if isinstance(other, np.ndarray):
898            return other * self
899        elif hasattr(other, 'real') and hasattr(other, 'imag'):
900            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
901                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
902                                               [self.real, other.real, self.imag, other.imag],
903                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
904                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
905                                               [self.real, other.real, self.imag, other.imag],
906                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
907            elif getattr(other, 'imag', 0) != 0:
908                return CObs(self.real * other.real - self.imag * other.imag,
909                            self.imag * other.real + self.real * other.imag)
910            else:
911                return CObs(self.real * other.real, self.imag * other.real)
912        else:
913            return CObs(self.real * other, self.imag * other)
914
915    def __rmul__(self, other):
916        return self * other
917
918    def __truediv__(self, other):
919        if isinstance(other, np.ndarray):
920            return 1 / (other / self)
921        elif hasattr(other, 'real') and hasattr(other, 'imag'):
922            r = other.real ** 2 + other.imag ** 2
923            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
924        else:
925            return CObs(self.real / other, self.imag / other)
926
927    def __rtruediv__(self, other):
928        r = self.real ** 2 + self.imag ** 2
929        if hasattr(other, 'real') and hasattr(other, 'imag'):
930            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
931        else:
932            return CObs(self.real * other / r, -self.imag * other / r)
933
934    def __abs__(self):
935        return np.sqrt(self.real**2 + self.imag**2)
936
937    def __pos__(self):
938        return self
939
940    def __neg__(self):
941        return -1 * self
942
943    def __eq__(self, other):
944        return self.real == other.real and self.imag == other.imag
945
946    def __str__(self):
947        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
948
949    def __repr__(self):
950        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0)
846    def __init__(self, real, imag=0.0):
847        self._real = real
848        self._imag = imag
849        self.tag = None
tag
real
imag
def gamma_method(self, **kwargs)
859    def gamma_method(self, **kwargs):
860        """Executes the gamma_method for the real and the imaginary part."""
861        if isinstance(self.real, Obs):
862            self.real.gamma_method(**kwargs)
863        if isinstance(self.imag, Obs):
864            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self)
866    def is_zero(self):
867        """Checks whether both real and imaginary part are zero within machine precision."""
868        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self)
870    def conjugate(self):
871        return CObs(self.real, -self.imag)
def derived_observable(func, data, array_mode=False, **kwargs)
1119def derived_observable(func, data, array_mode=False, **kwargs):
1120    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1121
1122    Parameters
1123    ----------
1124    func : object
1125        arbitrary function of the form func(data, **kwargs). For the
1126        automatic differentiation to work, all numpy functions have to have
1127        the autograd wrapper (use 'import autograd.numpy as anp').
1128    data : list
1129        list of Obs, e.g. [obs1, obs2, obs3].
1130    num_grad : bool
1131        if True, numerical derivatives are used instead of autograd
1132        (default False). To control the numerical differentiation the
1133        kwargs of numdifftools.step_generators.MaxStepGenerator
1134        can be used.
1135    man_grad : list
1136        manually supply a list or an array which contains the jacobian
1137        of func. Use cautiously, supplying the wrong derivative will
1138        not be intercepted.
1139
1140    Notes
1141    -----
1142    For simple mathematical operations it can be practical to use anonymous
1143    functions. For the ratio of two observables one can e.g. use
1144
1145    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1146    """
1147
1148    data = np.asarray(data)
1149    raveled_data = data.ravel()
1150
1151    # Workaround for matrix operations containing non Obs data
1152    if not all(isinstance(x, Obs) for x in raveled_data):
1153        for i in range(len(raveled_data)):
1154            if isinstance(raveled_data[i], (int, float)):
1155                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1156
1157    allcov = {}
1158    for o in raveled_data:
1159        for name in o.cov_names:
1160            if name in allcov:
1161                if not np.allclose(allcov[name], o.covobs[name].cov):
1162                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1163            else:
1164                allcov[name] = o.covobs[name].cov
1165
1166    n_obs = len(raveled_data)
1167    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1168    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1169    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1170
1171    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1172    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1173
1174    if data.ndim == 1:
1175        values = np.array([o.value for o in data])
1176    else:
1177        values = np.vectorize(lambda x: x.value)(data)
1178
1179    new_values = func(values, **kwargs)
1180
1181    multi = int(isinstance(new_values, np.ndarray))
1182
1183    new_r_values = {}
1184    new_idl_d = {}
1185    for name in new_sample_names:
1186        idl = []
1187        tmp_values = np.zeros(n_obs)
1188        for i, item in enumerate(raveled_data):
1189            tmp_values[i] = item.r_values.get(name, item.value)
1190            tmp_idl = item.idl.get(name)
1191            if tmp_idl is not None:
1192                idl.append(tmp_idl)
1193        if multi > 0:
1194            tmp_values = np.array(tmp_values).reshape(data.shape)
1195        new_r_values[name] = func(tmp_values, **kwargs)
1196        new_idl_d[name] = _merge_idx(idl)
1197        if not is_merged[name]:
1198            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1199
1200    if 'man_grad' in kwargs:
1201        deriv = np.asarray(kwargs.get('man_grad'))
1202        if new_values.shape + data.shape != deriv.shape:
1203            raise Exception('Manual derivative does not have correct shape.')
1204    elif kwargs.get('num_grad') is True:
1205        if multi > 0:
1206            raise Exception('Multi mode currently not supported for numerical derivative')
1207        options = {
1208            'base_step': 0.1,
1209            'step_ratio': 2.5}
1210        for key in options.keys():
1211            kwarg = kwargs.get(key)
1212            if kwarg is not None:
1213                options[key] = kwarg
1214        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1215        if tmp_df.size == 1:
1216            deriv = np.array([tmp_df.real])
1217        else:
1218            deriv = tmp_df.real
1219    else:
1220        deriv = jacobian(func)(values, **kwargs)
1221
1222    final_result = np.zeros(new_values.shape, dtype=object)
1223
1224    if array_mode is True:
1225
1226        class _Zero_grad():
1227            def __init__(self, N):
1228                self.grad = np.zeros((N, 1))
1229
1230        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1231        d_extracted = {}
1232        g_extracted = {}
1233        for name in new_sample_names:
1234            d_extracted[name] = []
1235            ens_length = len(new_idl_d[name])
1236            for i_dat, dat in enumerate(data):
1237                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1238        for name in new_cov_names:
1239            g_extracted[name] = []
1240            zero_grad = _Zero_grad(new_covobs_lengths[name])
1241            for i_dat, dat in enumerate(data):
1242                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1243
1244    for i_val, new_val in np.ndenumerate(new_values):
1245        new_deltas = {}
1246        new_grad = {}
1247        if array_mode is True:
1248            for name in new_sample_names:
1249                ens_length = d_extracted[name][0].shape[-1]
1250                new_deltas[name] = np.zeros(ens_length)
1251                for i_dat, dat in enumerate(d_extracted[name]):
1252                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1253            for name in new_cov_names:
1254                new_grad[name] = 0
1255                for i_dat, dat in enumerate(g_extracted[name]):
1256                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1257        else:
1258            for j_obs, obs in np.ndenumerate(data):
1259                for name in obs.names:
1260                    if name in obs.cov_names:
1261                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1262                    else:
1263                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1264
1265        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1266
1267        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1268            raise Exception('The same name has been used for deltas and covobs!')
1269        new_samples = []
1270        new_means = []
1271        new_idl = []
1272        new_names_obs = []
1273        for name in new_names:
1274            if name not in new_covobs:
1275                if is_merged[name]:
1276                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1277                else:
1278                    filtered_deltas = new_deltas[name]
1279                    filtered_idl_d = new_idl_d[name]
1280
1281                new_samples.append(filtered_deltas)
1282                new_idl.append(filtered_idl_d)
1283                new_means.append(new_r_values[name][i_val])
1284                new_names_obs.append(name)
1285        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1286        for name in new_covobs:
1287            final_result[i_val].names.append(name)
1288        final_result[i_val]._covobs = new_covobs
1289        final_result[i_val]._value = new_val
1290        final_result[i_val].is_merged = is_merged
1291        final_result[i_val].reweighted = reweighted
1292
1293    if multi == 0:
1294        final_result = final_result.item()
1295
1296    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs)
1336def reweight(weight, obs, **kwargs):
1337    """Reweight a list of observables.
1338
1339    Parameters
1340    ----------
1341    weight : Obs
1342        Reweighting factor. An Observable that has to be defined on a superset of the
1343        configurations in obs[i].idl for all i.
1344    obs : list
1345        list of Obs, e.g. [obs1, obs2, obs3].
1346    all_configs : bool
1347        if True, the reweighted observables are normalized by the average of
1348        the reweighting factor on all configurations in weight.idl and not
1349        on the configurations in obs[i].idl. Default False.
1350    """
1351    result = []
1352    for i in range(len(obs)):
1353        if len(obs[i].cov_names):
1354            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1355        if not set(obs[i].names).issubset(weight.names):
1356            raise Exception('Error: Ensembles do not fit')
1357        for name in obs[i].names:
1358            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1359                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1360        new_samples = []
1361        w_deltas = {}
1362        for name in sorted(obs[i].names):
1363            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1364            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1365        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1366
1367        if kwargs.get('all_configs'):
1368            new_weight = weight
1369        else:
1370            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1371
1372        result.append(tmp_obs / new_weight)
1373        result[-1].reweighted = True
1374        result[-1].is_merged = obs[i].is_merged
1375
1376    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def correlate(obs_a, obs_b)
1379def correlate(obs_a, obs_b):
1380    """Correlate two observables.
1381
1382    Parameters
1383    ----------
1384    obs_a : Obs
1385        First observable
1386    obs_b : Obs
1387        Second observable
1388
1389    Notes
1390    -----
1391    Keep in mind to only correlate primary observables which have not been reweighted
1392    yet. The reweighting has to be applied after correlating the observables.
1393    Currently only works if ensembles are identical (this is not strictly necessary).
1394    """
1395
1396    if sorted(obs_a.names) != sorted(obs_b.names):
1397        raise Exception('Ensembles do not fit')
1398    if len(obs_a.cov_names) or len(obs_b.cov_names):
1399        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1400    for name in obs_a.names:
1401        if obs_a.shape[name] != obs_b.shape[name]:
1402            raise Exception('Shapes of ensemble', name, 'do not fit')
1403        if obs_a.idl[name] != obs_b.idl[name]:
1404            raise Exception('idl of ensemble', name, 'do not fit')
1405
1406    if obs_a.reweighted is True:
1407        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1408    if obs_b.reweighted is True:
1409        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1410
1411    new_samples = []
1412    new_idl = []
1413    for name in sorted(obs_a.names):
1414        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1415        new_idl.append(obs_a.idl[name])
1416
1417    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1418    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1419    o.reweighted = obs_a.reweighted or obs_b.reweighted
1420    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs)
1423def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1424    r'''Calculates the error covariance matrix of a set of observables.
1425
1426    The gamma method has to be applied first to all observables.
1427
1428    Parameters
1429    ----------
1430    obs : list or numpy.ndarray
1431        List or one dimensional array of Obs
1432    visualize : bool
1433        If True plots the corresponding normalized correlation matrix (default False).
1434    correlation : bool
1435        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1436    smooth : None or int
1437        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1438        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1439        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1440        small ones.
1441
1442    Notes
1443    -----
1444    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1445    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1446    in the absence of autocorrelation.
1447    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1448    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1449    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1450    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1451    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1452    '''
1453
1454    length = len(obs)
1455
1456    max_samples = np.max([o.N for o in obs])
1457    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1458        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1459
1460    cov = np.zeros((length, length))
1461    for i in range(length):
1462        for j in range(i, length):
1463            cov[i, j] = _covariance_element(obs[i], obs[j])
1464    cov = cov + cov.T - np.diag(np.diag(cov))
1465
1466    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1467
1468    if isinstance(smooth, int):
1469        corr = _smooth_eigenvalues(corr, smooth)
1470
1471    if visualize:
1472        plt.matshow(corr, vmin=-1, vmax=1)
1473        plt.set_cmap('RdBu')
1474        plt.colorbar()
1475        plt.draw()
1476
1477    if correlation is True:
1478        return corr
1479
1480    errors = [o.dvalue for o in obs]
1481    cov = np.diag(errors) @ corr @ np.diag(errors)
1482
1483    eigenvalues = np.linalg.eigh(cov)[0]
1484    if not np.all(eigenvalues >= 0):
1485        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1486
1487    return cov

Calculates the error covariance matrix of a set of observables.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None)
1567def import_jackknife(jacks, name, idl=None):
1568    """Imports jackknife samples and returns an Obs
1569
1570    Parameters
1571    ----------
1572    jacks : numpy.ndarray
1573        numpy array containing the mean value as zeroth entry and
1574        the N jackknife samples as first to Nth entry.
1575    name : str
1576        name of the ensemble the samples are defined on.
1577    """
1578    length = len(jacks) - 1
1579    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1580    samples = jacks[1:] @ prj
1581    mean = np.mean(samples)
1582    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1583    new_obs._value = jacks[0]
1584    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def merge_obs(list_of_obs)
1587def merge_obs(list_of_obs):
1588    """Combine all observables in list_of_obs into one new observable
1589
1590    Parameters
1591    ----------
1592    list_of_obs : list
1593        list of the Obs object to be combined
1594
1595    Notes
1596    -----
1597    It is not possible to combine obs which are based on the same replicum
1598    """
1599    replist = [item for obs in list_of_obs for item in obs.names]
1600    if (len(replist) == len(set(replist))) is False:
1601        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1602    if any([len(o.cov_names) for o in list_of_obs]):
1603        raise Exception('Not possible to merge data that contains covobs!')
1604    new_dict = {}
1605    idl_dict = {}
1606    for o in list_of_obs:
1607        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1608                        for key in set(o.deltas) | set(o.r_values)})
1609        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1610
1611    names = sorted(new_dict.keys())
1612    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1613    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1614    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1615    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None)
1618def cov_Obs(means, cov, name, grad=None):
1619    """Create an Obs based on mean(s) and a covariance matrix
1620
1621    Parameters
1622    ----------
1623    mean : list of floats or float
1624        N mean value(s) of the new Obs
1625    cov : list or array
1626        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1627    name : str
1628        identifier for the covariance matrix
1629    grad : list or array
1630        Gradient of the Covobs wrt. the means belonging to cov.
1631    """
1632
1633    def covobs_to_obs(co):
1634        """Make an Obs out of a Covobs
1635
1636        Parameters
1637        ----------
1638        co : Covobs
1639            Covobs to be embedded into the Obs
1640        """
1641        o = Obs([], [], means=[])
1642        o._value = co.value
1643        o.names.append(co.name)
1644        o._covobs[co.name] = co
1645        o._dvalue = np.sqrt(co.errsq())
1646        return o
1647
1648    ol = []
1649    if isinstance(means, (float, int)):
1650        means = [means]
1651
1652    for i in range(len(means)):
1653        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1654    if ol[0].covobs[name].N != len(means):
1655        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1656    if len(ol) == 1:
1657        return ol[0]
1658    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.