pyerrors.obs

   1import warnings
   2import hashlib
   3import pickle
   4from math import gcd
   5from functools import reduce
   6import numpy as np
   7import autograd.numpy as anp  # Thinly-wrapped numpy
   8from autograd import jacobian
   9import matplotlib.pyplot as plt
  10from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  11import numdifftools as nd
  12from itertools import groupby
  13from .covobs import Covobs
  14
  15
  16class Obs:
  17    """Class for a general observable.
  18
  19    Instances of Obs are the basic objects of a pyerrors error analysis.
  20    They are initialized with a list which contains arrays of samples for
  21    different ensembles/replica and another list of same length which contains
  22    the names of the ensembles/replica. Mathematical operations can be
  23    performed on instances. The result is another instance of Obs. The error of
  24    an instance can be computed with the gamma_method. Also contains additional
  25    methods for output and visualization of the error calculation.
  26
  27    Attributes
  28    ----------
  29    S_global : float
  30        Standard value for S (default 2.0)
  31    S_dict : dict
  32        Dictionary for S values. If an entry for a given ensemble
  33        exists this overwrites the standard value for that ensemble.
  34    tau_exp_global : float
  35        Standard value for tau_exp (default 0.0)
  36    tau_exp_dict : dict
  37        Dictionary for tau_exp values. If an entry for a given ensemble exists
  38        this overwrites the standard value for that ensemble.
  39    N_sigma_global : float
  40        Standard value for N_sigma (default 1.0)
  41    N_sigma_dict : dict
  42        Dictionary for N_sigma values. If an entry for a given ensemble exists
  43        this overwrites the standard value for that ensemble.
  44    """
  45    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  46                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  47                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  48                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  49                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
  50
  51    S_global = 2.0
  52    S_dict = {}
  53    tau_exp_global = 0.0
  54    tau_exp_dict = {}
  55    N_sigma_global = 1.0
  56    N_sigma_dict = {}
  57    filter_eps = 1e-10
  58
  59    def __init__(self, samples, names, idl=None, **kwargs):
  60        """ Initialize Obs object.
  61
  62        Parameters
  63        ----------
  64        samples : list
  65            list of numpy arrays containing the Monte Carlo samples
  66        names : list
  67            list of strings labeling the individual samples
  68        idl : list, optional
  69            list of ranges or lists on which the samples are defined
  70        """
  71
  72        if kwargs.get("means") is None and len(samples):
  73            if len(samples) != len(names):
  74                raise Exception('Length of samples and names incompatible.')
  75            if idl is not None:
  76                if len(idl) != len(names):
  77                    raise Exception('Length of idl incompatible with samples and names.')
  78            name_length = len(names)
  79            if name_length > 1:
  80                if name_length != len(set(names)):
  81                    raise Exception('names are not unique.')
  82                if not all(isinstance(x, str) for x in names):
  83                    raise TypeError('All names have to be strings.')
  84            else:
  85                if not isinstance(names[0], str):
  86                    raise TypeError('All names have to be strings.')
  87            if min(len(x) for x in samples) <= 4:
  88                raise Exception('Samples have to have at least 5 entries.')
  89
  90        self.names = sorted(names)
  91        self.shape = {}
  92        self.r_values = {}
  93        self.deltas = {}
  94        self._covobs = {}
  95
  96        self._value = 0
  97        self.N = 0
  98        self.is_merged = {}
  99        self.idl = {}
 100        if idl is not None:
 101            for name, idx in sorted(zip(names, idl)):
 102                if isinstance(idx, range):
 103                    self.idl[name] = idx
 104                elif isinstance(idx, (list, np.ndarray)):
 105                    dc = np.unique(np.diff(idx))
 106                    if np.any(dc < 0):
 107                        raise Exception("Unsorted idx for idl[%s]" % (name))
 108                    if len(dc) == 1:
 109                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 110                    else:
 111                        self.idl[name] = list(idx)
 112                else:
 113                    raise Exception('incompatible type for idl[%s].' % (name))
 114        else:
 115            for name, sample in sorted(zip(names, samples)):
 116                self.idl[name] = range(1, len(sample) + 1)
 117
 118        if kwargs.get("means") is not None:
 119            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 120                self.shape[name] = len(self.idl[name])
 121                self.N += self.shape[name]
 122                self.r_values[name] = mean
 123                self.deltas[name] = sample
 124        else:
 125            for name, sample in sorted(zip(names, samples)):
 126                self.shape[name] = len(self.idl[name])
 127                self.N += self.shape[name]
 128                if len(sample) != self.shape[name]:
 129                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 130                self.r_values[name] = np.mean(sample)
 131                self.deltas[name] = sample - self.r_values[name]
 132                self._value += self.shape[name] * self.r_values[name]
 133            self._value /= self.N
 134
 135        self._dvalue = 0.0
 136        self.ddvalue = 0.0
 137        self.reweighted = False
 138
 139        self.tag = None
 140
 141    @property
 142    def value(self):
 143        return self._value
 144
 145    @property
 146    def dvalue(self):
 147        return self._dvalue
 148
 149    @property
 150    def e_names(self):
 151        return sorted(set([o.split('|')[0] for o in self.names]))
 152
 153    @property
 154    def cov_names(self):
 155        return sorted(set([o for o in self.covobs.keys()]))
 156
 157    @property
 158    def mc_names(self):
 159        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 160
 161    @property
 162    def e_content(self):
 163        res = {}
 164        for e, e_name in enumerate(self.e_names):
 165            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 166            if e_name in self.names:
 167                res[e_name].append(e_name)
 168        return res
 169
 170    @property
 171    def covobs(self):
 172        return self._covobs
 173
 174    def gamma_method(self, **kwargs):
 175        """Estimate the error and related properties of the Obs.
 176
 177        Parameters
 178        ----------
 179        S : float
 180            specifies a custom value for the parameter S (default 2.0).
 181            If set to 0 it is assumed that the data exhibits no
 182            autocorrelation. In this case the error estimates coincides
 183            with the sample standard error.
 184        tau_exp : float
 185            positive value triggers the critical slowing down analysis
 186            (default 0.0).
 187        N_sigma : float
 188            number of standard deviations from zero until the tail is
 189            attached to the autocorrelation function (default 1).
 190        fft : bool
 191            determines whether the fft algorithm is used for the computation
 192            of the autocorrelation function (default True)
 193        """
 194
 195        e_content = self.e_content
 196        self.e_dvalue = {}
 197        self.e_ddvalue = {}
 198        self.e_tauint = {}
 199        self.e_dtauint = {}
 200        self.e_windowsize = {}
 201        self.e_n_tauint = {}
 202        self.e_n_dtauint = {}
 203        e_gamma = {}
 204        self.e_rho = {}
 205        self.e_drho = {}
 206        self._dvalue = 0
 207        self.ddvalue = 0
 208
 209        self.S = {}
 210        self.tau_exp = {}
 211        self.N_sigma = {}
 212
 213        if kwargs.get('fft') is False:
 214            fft = False
 215        else:
 216            fft = True
 217
 218        def _parse_kwarg(kwarg_name):
 219            if kwarg_name in kwargs:
 220                tmp = kwargs.get(kwarg_name)
 221                if isinstance(tmp, (int, float)):
 222                    if tmp < 0:
 223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 224                    for e, e_name in enumerate(self.e_names):
 225                        getattr(self, kwarg_name)[e_name] = tmp
 226                else:
 227                    raise TypeError(kwarg_name + ' is not in proper format.')
 228            else:
 229                for e, e_name in enumerate(self.e_names):
 230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 232                    else:
 233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 234
 235        _parse_kwarg('S')
 236        _parse_kwarg('tau_exp')
 237        _parse_kwarg('N_sigma')
 238
 239        for e, e_name in enumerate(self.mc_names):
 240            r_length = []
 241            for r_name in e_content[e_name]:
 242                if isinstance(self.idl[r_name], range):
 243                    r_length.append(len(self.idl[r_name]))
 244                else:
 245                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
 246
 247            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 248            w_max = max(r_length) // 2
 249            e_gamma[e_name] = np.zeros(w_max)
 250            self.e_rho[e_name] = np.zeros(w_max)
 251            self.e_drho[e_name] = np.zeros(w_max)
 252
 253            for r_name in e_content[e_name]:
 254                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
 255
 256            gamma_div = np.zeros(w_max)
 257            for r_name in e_content[e_name]:
 258                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
 259            gamma_div[gamma_div < 1] = 1.0
 260            e_gamma[e_name] /= gamma_div[:w_max]
 261
 262            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 263                self.e_tauint[e_name] = 0.5
 264                self.e_dtauint[e_name] = 0.0
 265                self.e_dvalue[e_name] = 0.0
 266                self.e_ddvalue[e_name] = 0.0
 267                self.e_windowsize[e_name] = 0
 268                continue
 269
 270            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 271            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 272            # Make sure no entry of tauint is smaller than 0.5
 273            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 274            # hep-lat/0306017 eq. (42)
 275            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 276            self.e_n_dtauint[e_name][0] = 0.0
 277
 278            def _compute_drho(i):
 279                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
 280                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 281
 282            _compute_drho(1)
 283            if self.tau_exp[e_name] > 0:
 284                texp = self.tau_exp[e_name]
 285                # Critical slowing down analysis
 286                if w_max // 2 <= 1:
 287                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 288                for n in range(1, w_max // 2):
 289                    _compute_drho(n + 1)
 290                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 291                        # Bias correction hep-lat/0306017 eq. (49) included
 292                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 293                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 294                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 295                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 296                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 297                        self.e_windowsize[e_name] = n
 298                        break
 299            else:
 300                if self.S[e_name] == 0.0:
 301                    self.e_tauint[e_name] = 0.5
 302                    self.e_dtauint[e_name] = 0.0
 303                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 304                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 305                    self.e_windowsize[e_name] = 0
 306                else:
 307                    # Standard automatic windowing procedure
 308                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 309                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
 310                    for n in range(1, w_max):
 311                        if n < w_max // 2 - 2:
 312                            _compute_drho(n + 1)
 313                        if g_w[n - 1] < 0 or n >= w_max - 1:
 314                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 315                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 316                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 317                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 318                            self.e_windowsize[e_name] = n
 319                            break
 320
 321            self._dvalue += self.e_dvalue[e_name] ** 2
 322            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 323
 324        for e_name in self.cov_names:
 325            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 326            self.e_ddvalue[e_name] = 0
 327            self._dvalue += self.e_dvalue[e_name]**2
 328
 329        self._dvalue = np.sqrt(self._dvalue)
 330        if self._dvalue == 0.0:
 331            self.ddvalue = 0.0
 332        else:
 333            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 334        return
 335
 336    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
 337        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 338           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 339
 340        Parameters
 341        ----------
 342        deltas : list
 343            List of fluctuations
 344        idx : list
 345            List or range of configurations on which the deltas are defined.
 346        shape : int
 347            Number of configurations in idx.
 348        w_max : int
 349            Upper bound for the summation window.
 350        fft : bool
 351            determines whether the fft algorithm is used for the computation
 352            of the autocorrelation function.
 353        """
 354        gamma = np.zeros(w_max)
 355        deltas = _expand_deltas(deltas, idx, shape)
 356        new_shape = len(deltas)
 357        if fft:
 358            max_gamma = min(new_shape, w_max)
 359            # The padding for the fft has to be even
 360            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 361            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 362        else:
 363            for n in range(w_max):
 364                if new_shape - n >= 0:
 365                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 366
 367        return gamma
 368
 369    def details(self, ens_content=True):
 370        """Output detailed properties of the Obs.
 371
 372        Parameters
 373        ----------
 374        ens_content : bool
 375            print details about the ensembles and replica if true.
 376        """
 377        if self.tag is not None:
 378            print("Description:", self.tag)
 379        if not hasattr(self, 'e_dvalue'):
 380            print('Result\t %3.8e' % (self.value))
 381        else:
 382            if self.value == 0.0:
 383                percentage = np.nan
 384            else:
 385                percentage = np.abs(self._dvalue / self.value) * 100
 386            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 387            if len(self.e_names) > 1:
 388                print(' Ensemble errors:')
 389            for e_name in self.mc_names:
 390                if len(self.e_names) > 1:
 391                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 392                if self.tau_exp[e_name] > 0:
 393                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
 394                else:
 395                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
 396            for e_name in self.cov_names:
 397                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 398        if ens_content is True:
 399            if len(self.e_names) == 1:
 400                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 401            else:
 402                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 403            my_string_list = []
 404            for key, value in sorted(self.e_content.items()):
 405                if key not in self.covobs:
 406                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 407                    if len(value) == 1:
 408                        my_string += f': {self.shape[value[0]]} configurations'
 409                        if isinstance(self.idl[value[0]], range):
 410                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 411                        else:
 412                            my_string += ' (irregular range)'
 413                    else:
 414                        sublist = []
 415                        for v in value:
 416                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 417                            my_substring += f': {self.shape[v]} configurations'
 418                            if isinstance(self.idl[v], range):
 419                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 420                            else:
 421                                my_substring += ' (irregular range)'
 422                            sublist.append(my_substring)
 423
 424                        my_string += '\n' + '\n'.join(sublist)
 425                else:
 426                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 427                my_string_list.append(my_string)
 428            print('\n'.join(my_string_list))
 429
 430    def reweight(self, weight):
 431        """Reweight the obs with given rewighting factors.
 432
 433        Parameters
 434        ----------
 435        weight : Obs
 436            Reweighting factor. An Observable that has to be defined on a superset of the
 437            configurations in obs[i].idl for all i.
 438        all_configs : bool
 439            if True, the reweighted observables are normalized by the average of
 440            the reweighting factor on all configurations in weight.idl and not
 441            on the configurations in obs[i].idl. Default False.
 442        """
 443        return reweight(weight, [self])[0]
 444
 445    def is_zero_within_error(self, sigma=1):
 446        """Checks whether the observable is zero within 'sigma' standard errors.
 447
 448        Parameters
 449        ----------
 450        sigma : int
 451            Number of standard errors used for the check.
 452
 453        Works only properly when the gamma method was run.
 454        """
 455        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 456
 457    def is_zero(self, atol=1e-10):
 458        """Checks whether the observable is zero within a given tolerance.
 459
 460        Parameters
 461        ----------
 462        atol : float
 463            Absolute tolerance (for details see numpy documentation).
 464        """
 465        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 466
 467    def plot_tauint(self, save=None):
 468        """Plot integrated autocorrelation time for each ensemble.
 469
 470        Parameters
 471        ----------
 472        save : str
 473            saves the figure to a file named 'save' if.
 474        """
 475        if not hasattr(self, 'e_dvalue'):
 476            raise Exception('Run the gamma method first.')
 477
 478        for e, e_name in enumerate(self.mc_names):
 479            fig = plt.figure()
 480            plt.xlabel(r'$W$')
 481            plt.ylabel(r'$\tau_\mathrm{int}$')
 482            length = int(len(self.e_n_tauint[e_name]))
 483            if self.tau_exp[e_name] > 0:
 484                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 485                x_help = np.arange(2 * self.tau_exp[e_name])
 486                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 487                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 488                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 489                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 490                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 491                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 492                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 493            else:
 494                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 495                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 496
 497            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 498            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 499            plt.legend()
 500            plt.xlim(-0.5, xmax)
 501            ylim = plt.ylim()
 502            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 503            plt.draw()
 504            if save:
 505                fig.savefig(save + "_" + str(e))
 506
 507    def plot_rho(self, save=None):
 508        """Plot normalized autocorrelation function time for each ensemble.
 509
 510        Parameters
 511        ----------
 512        save : str
 513            saves the figure to a file named 'save' if.
 514        """
 515        if not hasattr(self, 'e_dvalue'):
 516            raise Exception('Run the gamma method first.')
 517        for e, e_name in enumerate(self.mc_names):
 518            fig = plt.figure()
 519            plt.xlabel('W')
 520            plt.ylabel('rho')
 521            length = int(len(self.e_drho[e_name]))
 522            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 523            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 524            if self.tau_exp[e_name] > 0:
 525                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 526                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 527                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 528                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 529            else:
 530                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 531                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 532            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 533            plt.xlim(-0.5, xmax)
 534            plt.draw()
 535            if save:
 536                fig.savefig(save + "_" + str(e))
 537
 538    def plot_rep_dist(self):
 539        """Plot replica distribution for each ensemble with more than one replicum."""
 540        if not hasattr(self, 'e_dvalue'):
 541            raise Exception('Run the gamma method first.')
 542        for e, e_name in enumerate(self.mc_names):
 543            if len(self.e_content[e_name]) == 1:
 544                print('No replica distribution for a single replicum (', e_name, ')')
 545                continue
 546            r_length = []
 547            sub_r_mean = 0
 548            for r, r_name in enumerate(self.e_content[e_name]):
 549                r_length.append(len(self.deltas[r_name]))
 550                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 551            e_N = np.sum(r_length)
 552            sub_r_mean /= e_N
 553            arr = np.zeros(len(self.e_content[e_name]))
 554            for r, r_name in enumerate(self.e_content[e_name]):
 555                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 556            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 557            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 558            plt.draw()
 559
 560    def plot_history(self, expand=True):
 561        """Plot derived Monte Carlo history for each ensemble
 562
 563        Parameters
 564        ----------
 565        expand : bool
 566            show expanded history for irregular Monte Carlo chains (default: True).
 567        """
 568        for e, e_name in enumerate(self.mc_names):
 569            plt.figure()
 570            r_length = []
 571            tmp = []
 572            tmp_expanded = []
 573            for r, r_name in enumerate(self.e_content[e_name]):
 574                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 575                if expand:
 576                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
 577                    r_length.append(len(tmp_expanded[-1]))
 578                else:
 579                    r_length.append(len(tmp[-1]))
 580            e_N = np.sum(r_length)
 581            x = np.arange(e_N)
 582            y_test = np.concatenate(tmp, axis=0)
 583            if expand:
 584                y = np.concatenate(tmp_expanded, axis=0)
 585            else:
 586                y = y_test
 587            plt.errorbar(x, y, fmt='.', markersize=3)
 588            plt.xlim(-0.5, e_N - 0.5)
 589            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 590            plt.draw()
 591
 592    def plot_piechart(self, save=None):
 593        """Plot piechart which shows the fractional contribution of each
 594        ensemble to the error and returns a dictionary containing the fractions.
 595
 596        Parameters
 597        ----------
 598        save : str
 599            saves the figure to a file named 'save' if.
 600        """
 601        if not hasattr(self, 'e_dvalue'):
 602            raise Exception('Run the gamma method first.')
 603        if np.isclose(0.0, self._dvalue, atol=1e-15):
 604            raise Exception('Error is 0.0')
 605        labels = self.e_names
 606        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 607        fig1, ax1 = plt.subplots()
 608        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 609        ax1.axis('equal')
 610        plt.draw()
 611        if save:
 612            fig1.savefig(save)
 613
 614        return dict(zip(self.e_names, sizes))
 615
 616    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 617        """Dump the Obs to a file 'name' of chosen format.
 618
 619        Parameters
 620        ----------
 621        filename : str
 622            name of the file to be saved.
 623        datatype : str
 624            Format of the exported file. Supported formats include
 625            "json.gz" and "pickle"
 626        description : str
 627            Description for output file, only relevant for json.gz format.
 628        path : str
 629            specifies a custom path for the file (default '.')
 630        """
 631        if 'path' in kwargs:
 632            file_name = kwargs.get('path') + '/' + filename
 633        else:
 634            file_name = filename
 635
 636        if datatype == "json.gz":
 637            from .input.json import dump_to_json
 638            dump_to_json([self], file_name, description=description)
 639        elif datatype == "pickle":
 640            with open(file_name + '.p', 'wb') as fb:
 641                pickle.dump(self, fb)
 642        else:
 643            raise Exception("Unknown datatype " + str(datatype))
 644
 645    def export_jackknife(self):
 646        """Export jackknife samples from the Obs
 647
 648        Returns
 649        -------
 650        numpy.ndarray
 651            Returns a numpy array of length N + 1 where N is the number of samples
 652            for the given ensemble and replicum. The zeroth entry of the array contains
 653            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 654            derived from the Obs. The current implementation only works for observables
 655            defined on exactly one ensemble and replicum. The derived jackknife samples
 656            should agree with samples from a full jackknife analysis up to O(1/N).
 657        """
 658
 659        if len(self.names) != 1:
 660            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 661
 662        name = self.names[0]
 663        full_data = self.deltas[name] + self.r_values[name]
 664        n = full_data.size
 665        mean = self.value
 666        tmp_jacks = np.zeros(n + 1)
 667        tmp_jacks[0] = mean
 668        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 669        return tmp_jacks
 670
 671    def __float__(self):
 672        return float(self.value)
 673
 674    def __repr__(self):
 675        return 'Obs[' + str(self) + ']'
 676
 677    def __str__(self):
 678        if self._dvalue == 0.0:
 679            return str(self.value)
 680        fexp = np.floor(np.log10(self._dvalue))
 681        if fexp < 0.0:
 682            return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
 683        elif fexp == 0.0:
 684            return '{:.1f}({:1.1f})'.format(self.value, self._dvalue)
 685        else:
 686            return '{:.0f}({:2.0f})'.format(self.value, self._dvalue)
 687
 688    def __hash__(self):
 689        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 690        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 691        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 692        hash_tuple += tuple([o.encode() for o in self.names])
 693        m = hashlib.md5()
 694        [m.update(o) for o in hash_tuple]
 695        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 696
 697    # Overload comparisons
 698    def __lt__(self, other):
 699        return self.value < other
 700
 701    def __le__(self, other):
 702        return self.value <= other
 703
 704    def __gt__(self, other):
 705        return self.value > other
 706
 707    def __ge__(self, other):
 708        return self.value >= other
 709
 710    def __eq__(self, other):
 711        return (self - other).is_zero()
 712
 713    def __ne__(self, other):
 714        return not (self - other).is_zero()
 715
 716    # Overload math operations
 717    def __add__(self, y):
 718        if isinstance(y, Obs):
 719            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 720        else:
 721            if isinstance(y, np.ndarray):
 722                return np.array([self + o for o in y])
 723            elif y.__class__.__name__ in ['Corr', 'CObs']:
 724                return NotImplemented
 725            else:
 726                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 727
 728    def __radd__(self, y):
 729        return self + y
 730
 731    def __mul__(self, y):
 732        if isinstance(y, Obs):
 733            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 734        else:
 735            if isinstance(y, np.ndarray):
 736                return np.array([self * o for o in y])
 737            elif isinstance(y, complex):
 738                return CObs(self * y.real, self * y.imag)
 739            elif y.__class__.__name__ in ['Corr', 'CObs']:
 740                return NotImplemented
 741            else:
 742                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 743
 744    def __rmul__(self, y):
 745        return self * y
 746
 747    def __sub__(self, y):
 748        if isinstance(y, Obs):
 749            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 750        else:
 751            if isinstance(y, np.ndarray):
 752                return np.array([self - o for o in y])
 753            elif y.__class__.__name__ in ['Corr', 'CObs']:
 754                return NotImplemented
 755            else:
 756                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 757
 758    def __rsub__(self, y):
 759        return -1 * (self - y)
 760
 761    def __pos__(self):
 762        return self
 763
 764    def __neg__(self):
 765        return -1 * self
 766
 767    def __truediv__(self, y):
 768        if isinstance(y, Obs):
 769            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 770        else:
 771            if isinstance(y, np.ndarray):
 772                return np.array([self / o for o in y])
 773            elif y.__class__.__name__ in ['Corr', 'CObs']:
 774                return NotImplemented
 775            else:
 776                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 777
 778    def __rtruediv__(self, y):
 779        if isinstance(y, Obs):
 780            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 781        else:
 782            if isinstance(y, np.ndarray):
 783                return np.array([o / self for o in y])
 784            elif y.__class__.__name__ in ['Corr', 'CObs']:
 785                return NotImplemented
 786            else:
 787                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 788
 789    def __pow__(self, y):
 790        if isinstance(y, Obs):
 791            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 792        else:
 793            return derived_observable(lambda x: x[0] ** y, [self])
 794
 795    def __rpow__(self, y):
 796        if isinstance(y, Obs):
 797            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 798        else:
 799            return derived_observable(lambda x: y ** x[0], [self])
 800
 801    def __abs__(self):
 802        return derived_observable(lambda x: anp.abs(x[0]), [self])
 803
 804    # Overload numpy functions
 805    def sqrt(self):
 806        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 807
 808    def log(self):
 809        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 810
 811    def exp(self):
 812        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 813
 814    def sin(self):
 815        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 816
 817    def cos(self):
 818        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 819
 820    def tan(self):
 821        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 822
 823    def arcsin(self):
 824        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 825
 826    def arccos(self):
 827        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 828
 829    def arctan(self):
 830        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 831
 832    def sinh(self):
 833        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 834
 835    def cosh(self):
 836        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 837
 838    def tanh(self):
 839        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 840
 841    def arcsinh(self):
 842        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 843
 844    def arccosh(self):
 845        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 846
 847    def arctanh(self):
 848        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 849
 850
 851class CObs:
 852    """Class for a complex valued observable."""
 853    __slots__ = ['_real', '_imag', 'tag']
 854
 855    def __init__(self, real, imag=0.0):
 856        self._real = real
 857        self._imag = imag
 858        self.tag = None
 859
 860    @property
 861    def real(self):
 862        return self._real
 863
 864    @property
 865    def imag(self):
 866        return self._imag
 867
 868    def gamma_method(self, **kwargs):
 869        """Executes the gamma_method for the real and the imaginary part."""
 870        if isinstance(self.real, Obs):
 871            self.real.gamma_method(**kwargs)
 872        if isinstance(self.imag, Obs):
 873            self.imag.gamma_method(**kwargs)
 874
 875    def is_zero(self):
 876        """Checks whether both real and imaginary part are zero within machine precision."""
 877        return self.real == 0.0 and self.imag == 0.0
 878
 879    def conjugate(self):
 880        return CObs(self.real, -self.imag)
 881
 882    def __add__(self, other):
 883        if isinstance(other, np.ndarray):
 884            return other + self
 885        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 886            return CObs(self.real + other.real,
 887                        self.imag + other.imag)
 888        else:
 889            return CObs(self.real + other, self.imag)
 890
 891    def __radd__(self, y):
 892        return self + y
 893
 894    def __sub__(self, other):
 895        if isinstance(other, np.ndarray):
 896            return -1 * (other - self)
 897        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 898            return CObs(self.real - other.real, self.imag - other.imag)
 899        else:
 900            return CObs(self.real - other, self.imag)
 901
 902    def __rsub__(self, other):
 903        return -1 * (self - other)
 904
 905    def __mul__(self, other):
 906        if isinstance(other, np.ndarray):
 907            return other * self
 908        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 909            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 910                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 911                                               [self.real, other.real, self.imag, other.imag],
 912                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 913                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 914                                               [self.real, other.real, self.imag, other.imag],
 915                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 916            elif getattr(other, 'imag', 0) != 0:
 917                return CObs(self.real * other.real - self.imag * other.imag,
 918                            self.imag * other.real + self.real * other.imag)
 919            else:
 920                return CObs(self.real * other.real, self.imag * other.real)
 921        else:
 922            return CObs(self.real * other, self.imag * other)
 923
 924    def __rmul__(self, other):
 925        return self * other
 926
 927    def __truediv__(self, other):
 928        if isinstance(other, np.ndarray):
 929            return 1 / (other / self)
 930        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 931            r = other.real ** 2 + other.imag ** 2
 932            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 933        else:
 934            return CObs(self.real / other, self.imag / other)
 935
 936    def __rtruediv__(self, other):
 937        r = self.real ** 2 + self.imag ** 2
 938        if hasattr(other, 'real') and hasattr(other, 'imag'):
 939            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 940        else:
 941            return CObs(self.real * other / r, -self.imag * other / r)
 942
 943    def __abs__(self):
 944        return np.sqrt(self.real**2 + self.imag**2)
 945
 946    def __pos__(self):
 947        return self
 948
 949    def __neg__(self):
 950        return -1 * self
 951
 952    def __eq__(self, other):
 953        return self.real == other.real and self.imag == other.imag
 954
 955    def __str__(self):
 956        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 957
 958    def __repr__(self):
 959        return 'CObs[' + str(self) + ']'
 960
 961
 962def _expand_deltas(deltas, idx, shape):
 963    """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0.
 964       If idx is of type range, the deltas are not changed
 965
 966    Parameters
 967    ----------
 968    deltas : list
 969        List of fluctuations
 970    idx : list
 971        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
 972    shape : int
 973        Number of configs in idx.
 974    """
 975    if isinstance(idx, range):
 976        return deltas
 977    else:
 978        ret = np.zeros(idx[-1] - idx[0] + 1)
 979        for i in range(shape):
 980            ret[idx[i] - idx[0]] = deltas[i]
 981        return ret
 982
 983
 984def _merge_idx(idl):
 985    """Returns the union of all lists in idl as sorted list
 986
 987    Parameters
 988    ----------
 989    idl : list
 990        List of lists or ranges.
 991    """
 992
 993    # Use groupby to efficiently check whether all elements of idl are identical
 994    try:
 995        g = groupby(idl)
 996        if next(g, True) and not next(g, False):
 997            return idl[0]
 998    except Exception:
 999        pass
1000
1001    if np.all([type(idx) is range for idx in idl]):
1002        if len(set([idx[0] for idx in idl])) == 1:
1003            idstart = min([idx.start for idx in idl])
1004            idstop = max([idx.stop for idx in idl])
1005            idstep = min([idx.step for idx in idl])
1006            return range(idstart, idstop, idstep)
1007
1008    return sorted(set().union(*idl))
1009
1010
1011def _intersection_idx(idl):
1012    """Returns the intersection of all lists in idl as sorted list
1013
1014    Parameters
1015    ----------
1016    idl : list
1017        List of lists or ranges.
1018    """
1019
1020    def _lcm(*args):
1021        """Returns the lowest common multiple of args.
1022
1023        From python 3.9 onwards the math library contains an lcm function."""
1024        return reduce(lambda a, b: a * b // gcd(a, b), args)
1025
1026    # Use groupby to efficiently check whether all elements of idl are identical
1027    try:
1028        g = groupby(idl)
1029        if next(g, True) and not next(g, False):
1030            return idl[0]
1031    except Exception:
1032        pass
1033
1034    if np.all([type(idx) is range for idx in idl]):
1035        if len(set([idx[0] for idx in idl])) == 1:
1036            idstart = max([idx.start for idx in idl])
1037            idstop = min([idx.stop for idx in idl])
1038            idstep = _lcm(*[idx.step for idx in idl])
1039            return range(idstart, idstop, idstep)
1040
1041    return sorted(set.intersection(*[set(o) for o in idl]))
1042
1043
1044def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1045    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1046       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1047       common divisor of the step sizes is used as new step size.
1048
1049    Parameters
1050    ----------
1051    deltas : list
1052        List of fluctuations
1053    idx : list
1054        List or range of configs on which the deltas are defined.
1055        Has to be a subset of new_idx and has to be sorted in ascending order.
1056    shape : list
1057        Number of configs in idx.
1058    new_idx : list
1059        List of configs that defines the new range, has to be sorted in ascending order.
1060    """
1061
1062    if type(idx) is range and type(new_idx) is range:
1063        if idx == new_idx:
1064            return deltas
1065    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1066    for i in range(shape):
1067        ret[idx[i] - new_idx[0]] = deltas[i]
1068    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
1069
1070
1071def _collapse_deltas_for_merge(deltas, idx, shape, new_idx):
1072    """Collapse deltas defined on idx to the list of configs that is defined by new_idx.
1073       If idx and new_idx are of type range, the smallest
1074       common divisor of the step sizes is used as new step size.
1075
1076    Parameters
1077    ----------
1078    deltas : list
1079        List of fluctuations
1080    idx : list
1081        List or range of configs on which the deltas are defined.
1082        Has to be a subset of new_idx and has to be sorted in ascending order.
1083    shape : list
1084        Number of configs in idx.
1085    new_idx : list
1086        List of configs that defines the new range, has to be sorted in ascending order.
1087    """
1088
1089    if type(idx) is range and type(new_idx) is range:
1090        if idx == new_idx:
1091            return deltas
1092    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1093    for i in range(shape):
1094        if idx[i] in new_idx:
1095            ret[idx[i] - new_idx[0]] = deltas[i]
1096    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
1097
1098
1099def _filter_zeroes(deltas, idx, eps=Obs.filter_eps):
1100    """Filter out all configurations with vanishing fluctuation such that they do not
1101       contribute to the error estimate anymore. Returns the new deltas and
1102       idx according to the filtering.
1103       A fluctuation is considered to be vanishing, if it is smaller than eps times
1104       the mean of the absolute values of all deltas in one list.
1105
1106    Parameters
1107    ----------
1108    deltas : list
1109        List of fluctuations
1110    idx : list
1111        List or ranges of configs on which the deltas are defined.
1112    eps : float
1113        Prefactor that enters the filter criterion.
1114    """
1115    new_deltas = []
1116    new_idx = []
1117    maxd = np.mean(np.fabs(deltas))
1118    for i in range(len(deltas)):
1119        if abs(deltas[i]) > eps * maxd:
1120            new_deltas.append(deltas[i])
1121            new_idx.append(idx[i])
1122    if new_idx:
1123        return np.array(new_deltas), new_idx
1124    else:
1125        return deltas, idx
1126
1127
1128def derived_observable(func, data, array_mode=False, **kwargs):
1129    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1130
1131    Parameters
1132    ----------
1133    func : object
1134        arbitrary function of the form func(data, **kwargs). For the
1135        automatic differentiation to work, all numpy functions have to have
1136        the autograd wrapper (use 'import autograd.numpy as anp').
1137    data : list
1138        list of Obs, e.g. [obs1, obs2, obs3].
1139    num_grad : bool
1140        if True, numerical derivatives are used instead of autograd
1141        (default False). To control the numerical differentiation the
1142        kwargs of numdifftools.step_generators.MaxStepGenerator
1143        can be used.
1144    man_grad : list
1145        manually supply a list or an array which contains the jacobian
1146        of func. Use cautiously, supplying the wrong derivative will
1147        not be intercepted.
1148
1149    Notes
1150    -----
1151    For simple mathematical operations it can be practical to use anonymous
1152    functions. For the ratio of two observables one can e.g. use
1153
1154    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1155    """
1156
1157    data = np.asarray(data)
1158    raveled_data = data.ravel()
1159
1160    # Workaround for matrix operations containing non Obs data
1161    if not all(isinstance(x, Obs) for x in raveled_data):
1162        for i in range(len(raveled_data)):
1163            if isinstance(raveled_data[i], (int, float)):
1164                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1165
1166    allcov = {}
1167    for o in raveled_data:
1168        for name in o.cov_names:
1169            if name in allcov:
1170                if not np.allclose(allcov[name], o.covobs[name].cov):
1171                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1172            else:
1173                allcov[name] = o.covobs[name].cov
1174
1175    n_obs = len(raveled_data)
1176    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1177    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1178    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1179
1180    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1181    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1182
1183    if data.ndim == 1:
1184        values = np.array([o.value for o in data])
1185    else:
1186        values = np.vectorize(lambda x: x.value)(data)
1187
1188    new_values = func(values, **kwargs)
1189
1190    multi = int(isinstance(new_values, np.ndarray))
1191
1192    new_r_values = {}
1193    new_idl_d = {}
1194    for name in new_sample_names:
1195        idl = []
1196        tmp_values = np.zeros(n_obs)
1197        for i, item in enumerate(raveled_data):
1198            tmp_values[i] = item.r_values.get(name, item.value)
1199            tmp_idl = item.idl.get(name)
1200            if tmp_idl is not None:
1201                idl.append(tmp_idl)
1202        if multi > 0:
1203            tmp_values = np.array(tmp_values).reshape(data.shape)
1204        new_r_values[name] = func(tmp_values, **kwargs)
1205        new_idl_d[name] = _merge_idx(idl)
1206        if not is_merged[name]:
1207            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1208
1209    if 'man_grad' in kwargs:
1210        deriv = np.asarray(kwargs.get('man_grad'))
1211        if new_values.shape + data.shape != deriv.shape:
1212            raise Exception('Manual derivative does not have correct shape.')
1213    elif kwargs.get('num_grad') is True:
1214        if multi > 0:
1215            raise Exception('Multi mode currently not supported for numerical derivative')
1216        options = {
1217            'base_step': 0.1,
1218            'step_ratio': 2.5}
1219        for key in options.keys():
1220            kwarg = kwargs.get(key)
1221            if kwarg is not None:
1222                options[key] = kwarg
1223        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1224        if tmp_df.size == 1:
1225            deriv = np.array([tmp_df.real])
1226        else:
1227            deriv = tmp_df.real
1228    else:
1229        deriv = jacobian(func)(values, **kwargs)
1230
1231    final_result = np.zeros(new_values.shape, dtype=object)
1232
1233    if array_mode is True:
1234
1235        class _Zero_grad():
1236            def __init__(self, N):
1237                self.grad = np.zeros((N, 1))
1238
1239        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1240        d_extracted = {}
1241        g_extracted = {}
1242        for name in new_sample_names:
1243            d_extracted[name] = []
1244            ens_length = len(new_idl_d[name])
1245            for i_dat, dat in enumerate(data):
1246                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1247        for name in new_cov_names:
1248            g_extracted[name] = []
1249            zero_grad = _Zero_grad(new_covobs_lengths[name])
1250            for i_dat, dat in enumerate(data):
1251                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1252
1253    for i_val, new_val in np.ndenumerate(new_values):
1254        new_deltas = {}
1255        new_grad = {}
1256        if array_mode is True:
1257            for name in new_sample_names:
1258                ens_length = d_extracted[name][0].shape[-1]
1259                new_deltas[name] = np.zeros(ens_length)
1260                for i_dat, dat in enumerate(d_extracted[name]):
1261                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1262            for name in new_cov_names:
1263                new_grad[name] = 0
1264                for i_dat, dat in enumerate(g_extracted[name]):
1265                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1266        else:
1267            for j_obs, obs in np.ndenumerate(data):
1268                for name in obs.names:
1269                    if name in obs.cov_names:
1270                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1271                    else:
1272                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1273
1274        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1275
1276        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1277            raise Exception('The same name has been used for deltas and covobs!')
1278        new_samples = []
1279        new_means = []
1280        new_idl = []
1281        new_names_obs = []
1282        for name in new_names:
1283            if name not in new_covobs:
1284                if is_merged[name]:
1285                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1286                else:
1287                    filtered_deltas = new_deltas[name]
1288                    filtered_idl_d = new_idl_d[name]
1289
1290                new_samples.append(filtered_deltas)
1291                new_idl.append(filtered_idl_d)
1292                new_means.append(new_r_values[name][i_val])
1293                new_names_obs.append(name)
1294        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1295        for name in new_covobs:
1296            final_result[i_val].names.append(name)
1297        final_result[i_val]._covobs = new_covobs
1298        final_result[i_val]._value = new_val
1299        final_result[i_val].is_merged = is_merged
1300        final_result[i_val].reweighted = reweighted
1301
1302    if multi == 0:
1303        final_result = final_result.item()
1304
1305    return final_result
1306
1307
1308def _reduce_deltas(deltas, idx_old, idx_new):
1309    """Extract deltas defined on idx_old on all configs of idx_new.
1310
1311    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1312    are ordered in an ascending order.
1313
1314    Parameters
1315    ----------
1316    deltas : list
1317        List of fluctuations
1318    idx_old : list
1319        List or range of configs on which the deltas are defined
1320    idx_new : list
1321        List of configs for which we want to extract the deltas.
1322        Has to be a subset of idx_old.
1323    """
1324    if not len(deltas) == len(idx_old):
1325        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1326    if type(idx_old) is range and type(idx_new) is range:
1327        if idx_old == idx_new:
1328            return deltas
1329    shape = len(idx_new)
1330    ret = np.zeros(shape)
1331    oldpos = 0
1332    for i in range(shape):
1333        pos = -1
1334        for j in range(oldpos, len(idx_old)):
1335            if idx_old[j] == idx_new[i]:
1336                pos = j
1337                break
1338        if pos < 0:
1339            raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i]))
1340        ret[i] = deltas[pos]
1341        oldpos = pos
1342    return np.array(ret)
1343
1344
1345def reweight(weight, obs, **kwargs):
1346    """Reweight a list of observables.
1347
1348    Parameters
1349    ----------
1350    weight : Obs
1351        Reweighting factor. An Observable that has to be defined on a superset of the
1352        configurations in obs[i].idl for all i.
1353    obs : list
1354        list of Obs, e.g. [obs1, obs2, obs3].
1355    all_configs : bool
1356        if True, the reweighted observables are normalized by the average of
1357        the reweighting factor on all configurations in weight.idl and not
1358        on the configurations in obs[i].idl. Default False.
1359    """
1360    result = []
1361    for i in range(len(obs)):
1362        if len(obs[i].cov_names):
1363            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1364        if not set(obs[i].names).issubset(weight.names):
1365            raise Exception('Error: Ensembles do not fit')
1366        for name in obs[i].names:
1367            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1368                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1369        new_samples = []
1370        w_deltas = {}
1371        for name in sorted(obs[i].names):
1372            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1373            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1374        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1375
1376        if kwargs.get('all_configs'):
1377            new_weight = weight
1378        else:
1379            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1380
1381        result.append(tmp_obs / new_weight)
1382        result[-1].reweighted = True
1383        result[-1].is_merged = obs[i].is_merged
1384
1385    return result
1386
1387
1388def correlate(obs_a, obs_b):
1389    """Correlate two observables.
1390
1391    Parameters
1392    ----------
1393    obs_a : Obs
1394        First observable
1395    obs_b : Obs
1396        Second observable
1397
1398    Notes
1399    -----
1400    Keep in mind to only correlate primary observables which have not been reweighted
1401    yet. The reweighting has to be applied after correlating the observables.
1402    Currently only works if ensembles are identical (this is not strictly necessary).
1403    """
1404
1405    if sorted(obs_a.names) != sorted(obs_b.names):
1406        raise Exception('Ensembles do not fit')
1407    if len(obs_a.cov_names) or len(obs_b.cov_names):
1408        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1409    for name in obs_a.names:
1410        if obs_a.shape[name] != obs_b.shape[name]:
1411            raise Exception('Shapes of ensemble', name, 'do not fit')
1412        if obs_a.idl[name] != obs_b.idl[name]:
1413            raise Exception('idl of ensemble', name, 'do not fit')
1414
1415    if obs_a.reweighted is True:
1416        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1417    if obs_b.reweighted is True:
1418        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1419
1420    new_samples = []
1421    new_idl = []
1422    for name in sorted(obs_a.names):
1423        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1424        new_idl.append(obs_a.idl[name])
1425
1426    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1427    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1428    o.reweighted = obs_a.reweighted or obs_b.reweighted
1429    return o
1430
1431
1432def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1433    r'''Calculates the error covariance matrix of a set of observables.
1434
1435    The gamma method has to be applied first to all observables.
1436
1437    Parameters
1438    ----------
1439    obs : list or numpy.ndarray
1440        List or one dimensional array of Obs
1441    visualize : bool
1442        If True plots the corresponding normalized correlation matrix (default False).
1443    correlation : bool
1444        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1445    smooth : None or int
1446        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1447        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1448        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1449        small ones.
1450
1451    Notes
1452    -----
1453    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1454    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1455    in the absence of autocorrelation.
1456    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1457    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1458    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1459    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1460    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1461    '''
1462
1463    length = len(obs)
1464
1465    max_samples = np.max([o.N for o in obs])
1466    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1467        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1468
1469    cov = np.zeros((length, length))
1470    for i in range(length):
1471        for j in range(i, length):
1472            cov[i, j] = _covariance_element(obs[i], obs[j])
1473    cov = cov + cov.T - np.diag(np.diag(cov))
1474
1475    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1476
1477    if isinstance(smooth, int):
1478        corr = _smooth_eigenvalues(corr, smooth)
1479
1480    if visualize:
1481        plt.matshow(corr, vmin=-1, vmax=1)
1482        plt.set_cmap('RdBu')
1483        plt.colorbar()
1484        plt.draw()
1485
1486    if correlation is True:
1487        return corr
1488
1489    errors = [o.dvalue for o in obs]
1490    cov = np.diag(errors) @ corr @ np.diag(errors)
1491
1492    eigenvalues = np.linalg.eigh(cov)[0]
1493    if not np.all(eigenvalues >= 0):
1494        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1495
1496    return cov
1497
1498
1499def _smooth_eigenvalues(corr, E):
1500    """Eigenvalue smoothing as described in hep-lat/9412087
1501
1502    corr : np.ndarray
1503        correlation matrix
1504    E : integer
1505        Number of eigenvalues to be left substantially unchanged
1506    """
1507    if not (2 < E < corr.shape[0] - 1):
1508        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1509    vals, vec = np.linalg.eigh(corr)
1510    lambda_min = np.mean(vals[:-E])
1511    vals[vals < lambda_min] = lambda_min
1512    vals /= np.mean(vals)
1513    return vec @ np.diag(vals) @ vec.T
1514
1515
1516def _covariance_element(obs1, obs2):
1517    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1518
1519    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1520        deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx)
1521        deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx)
1522        return np.sum(deltas1 * deltas2)
1523
1524    if set(obs1.names).isdisjoint(set(obs2.names)):
1525        return 0.0
1526
1527    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1528        raise Exception('The gamma method has to be applied to both Obs first.')
1529
1530    dvalue = 0.0
1531
1532    for e_name in obs1.mc_names:
1533
1534        if e_name not in obs2.mc_names:
1535            continue
1536
1537        idl_d = {}
1538        for r_name in obs1.e_content[e_name]:
1539            if r_name not in obs2.e_content[e_name]:
1540                continue
1541            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1542
1543        gamma = 0.0
1544
1545        for r_name in obs1.e_content[e_name]:
1546            if r_name not in obs2.e_content[e_name]:
1547                continue
1548            if len(idl_d[r_name]) == 0:
1549                continue
1550            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1551
1552        if gamma == 0.0:
1553            continue
1554
1555        gamma_div = 0.0
1556        for r_name in obs1.e_content[e_name]:
1557            if r_name not in obs2.e_content[e_name]:
1558                continue
1559            if len(idl_d[r_name]) == 0:
1560                continue
1561            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1562        gamma /= gamma_div
1563
1564        dvalue += gamma
1565
1566    for e_name in obs1.cov_names:
1567
1568        if e_name not in obs2.cov_names:
1569            continue
1570
1571        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1572
1573    return dvalue
1574
1575
1576def import_jackknife(jacks, name, idl=None):
1577    """Imports jackknife samples and returns an Obs
1578
1579    Parameters
1580    ----------
1581    jacks : numpy.ndarray
1582        numpy array containing the mean value as zeroth entry and
1583        the N jackknife samples as first to Nth entry.
1584    name : str
1585        name of the ensemble the samples are defined on.
1586    """
1587    length = len(jacks) - 1
1588    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1589    samples = jacks[1:] @ prj
1590    mean = np.mean(samples)
1591    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1592    new_obs._value = jacks[0]
1593    return new_obs
1594
1595
1596def merge_obs(list_of_obs):
1597    """Combine all observables in list_of_obs into one new observable
1598
1599    Parameters
1600    ----------
1601    list_of_obs : list
1602        list of the Obs object to be combined
1603
1604    Notes
1605    -----
1606    It is not possible to combine obs which are based on the same replicum
1607    """
1608    replist = [item for obs in list_of_obs for item in obs.names]
1609    if (len(replist) == len(set(replist))) is False:
1610        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1611    if any([len(o.cov_names) for o in list_of_obs]):
1612        raise Exception('Not possible to merge data that contains covobs!')
1613    new_dict = {}
1614    idl_dict = {}
1615    for o in list_of_obs:
1616        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1617                        for key in set(o.deltas) | set(o.r_values)})
1618        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1619
1620    names = sorted(new_dict.keys())
1621    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1622    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1623    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1624    return o
1625
1626
1627def cov_Obs(means, cov, name, grad=None):
1628    """Create an Obs based on mean(s) and a covariance matrix
1629
1630    Parameters
1631    ----------
1632    mean : list of floats or float
1633        N mean value(s) of the new Obs
1634    cov : list or array
1635        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1636    name : str
1637        identifier for the covariance matrix
1638    grad : list or array
1639        Gradient of the Covobs wrt. the means belonging to cov.
1640    """
1641
1642    def covobs_to_obs(co):
1643        """Make an Obs out of a Covobs
1644
1645        Parameters
1646        ----------
1647        co : Covobs
1648            Covobs to be embedded into the Obs
1649        """
1650        o = Obs([], [], means=[])
1651        o._value = co.value
1652        o.names.append(co.name)
1653        o._covobs[co.name] = co
1654        o._dvalue = np.sqrt(co.errsq())
1655        return o
1656
1657    ol = []
1658    if isinstance(means, (float, int)):
1659        means = [means]
1660
1661    for i in range(len(means)):
1662        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1663    if ol[0].covobs[name].N != len(means):
1664        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1665    if len(ol) == 1:
1666        return ol[0]
1667    return ol
class Obs:
 17class Obs:
 18    """Class for a general observable.
 19
 20    Instances of Obs are the basic objects of a pyerrors error analysis.
 21    They are initialized with a list which contains arrays of samples for
 22    different ensembles/replica and another list of same length which contains
 23    the names of the ensembles/replica. Mathematical operations can be
 24    performed on instances. The result is another instance of Obs. The error of
 25    an instance can be computed with the gamma_method. Also contains additional
 26    methods for output and visualization of the error calculation.
 27
 28    Attributes
 29    ----------
 30    S_global : float
 31        Standard value for S (default 2.0)
 32    S_dict : dict
 33        Dictionary for S values. If an entry for a given ensemble
 34        exists this overwrites the standard value for that ensemble.
 35    tau_exp_global : float
 36        Standard value for tau_exp (default 0.0)
 37    tau_exp_dict : dict
 38        Dictionary for tau_exp values. If an entry for a given ensemble exists
 39        this overwrites the standard value for that ensemble.
 40    N_sigma_global : float
 41        Standard value for N_sigma (default 1.0)
 42    N_sigma_dict : dict
 43        Dictionary for N_sigma values. If an entry for a given ensemble exists
 44        this overwrites the standard value for that ensemble.
 45    """
 46    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 47                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 48                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 49                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 50                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
 51
 52    S_global = 2.0
 53    S_dict = {}
 54    tau_exp_global = 0.0
 55    tau_exp_dict = {}
 56    N_sigma_global = 1.0
 57    N_sigma_dict = {}
 58    filter_eps = 1e-10
 59
 60    def __init__(self, samples, names, idl=None, **kwargs):
 61        """ Initialize Obs object.
 62
 63        Parameters
 64        ----------
 65        samples : list
 66            list of numpy arrays containing the Monte Carlo samples
 67        names : list
 68            list of strings labeling the individual samples
 69        idl : list, optional
 70            list of ranges or lists on which the samples are defined
 71        """
 72
 73        if kwargs.get("means") is None and len(samples):
 74            if len(samples) != len(names):
 75                raise Exception('Length of samples and names incompatible.')
 76            if idl is not None:
 77                if len(idl) != len(names):
 78                    raise Exception('Length of idl incompatible with samples and names.')
 79            name_length = len(names)
 80            if name_length > 1:
 81                if name_length != len(set(names)):
 82                    raise Exception('names are not unique.')
 83                if not all(isinstance(x, str) for x in names):
 84                    raise TypeError('All names have to be strings.')
 85            else:
 86                if not isinstance(names[0], str):
 87                    raise TypeError('All names have to be strings.')
 88            if min(len(x) for x in samples) <= 4:
 89                raise Exception('Samples have to have at least 5 entries.')
 90
 91        self.names = sorted(names)
 92        self.shape = {}
 93        self.r_values = {}
 94        self.deltas = {}
 95        self._covobs = {}
 96
 97        self._value = 0
 98        self.N = 0
 99        self.is_merged = {}
100        self.idl = {}
101        if idl is not None:
102            for name, idx in sorted(zip(names, idl)):
103                if isinstance(idx, range):
104                    self.idl[name] = idx
105                elif isinstance(idx, (list, np.ndarray)):
106                    dc = np.unique(np.diff(idx))
107                    if np.any(dc < 0):
108                        raise Exception("Unsorted idx for idl[%s]" % (name))
109                    if len(dc) == 1:
110                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
111                    else:
112                        self.idl[name] = list(idx)
113                else:
114                    raise Exception('incompatible type for idl[%s].' % (name))
115        else:
116            for name, sample in sorted(zip(names, samples)):
117                self.idl[name] = range(1, len(sample) + 1)
118
119        if kwargs.get("means") is not None:
120            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
121                self.shape[name] = len(self.idl[name])
122                self.N += self.shape[name]
123                self.r_values[name] = mean
124                self.deltas[name] = sample
125        else:
126            for name, sample in sorted(zip(names, samples)):
127                self.shape[name] = len(self.idl[name])
128                self.N += self.shape[name]
129                if len(sample) != self.shape[name]:
130                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
131                self.r_values[name] = np.mean(sample)
132                self.deltas[name] = sample - self.r_values[name]
133                self._value += self.shape[name] * self.r_values[name]
134            self._value /= self.N
135
136        self._dvalue = 0.0
137        self.ddvalue = 0.0
138        self.reweighted = False
139
140        self.tag = None
141
142    @property
143    def value(self):
144        return self._value
145
146    @property
147    def dvalue(self):
148        return self._dvalue
149
150    @property
151    def e_names(self):
152        return sorted(set([o.split('|')[0] for o in self.names]))
153
154    @property
155    def cov_names(self):
156        return sorted(set([o for o in self.covobs.keys()]))
157
158    @property
159    def mc_names(self):
160        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
161
162    @property
163    def e_content(self):
164        res = {}
165        for e, e_name in enumerate(self.e_names):
166            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
167            if e_name in self.names:
168                res[e_name].append(e_name)
169        return res
170
171    @property
172    def covobs(self):
173        return self._covobs
174
175    def gamma_method(self, **kwargs):
176        """Estimate the error and related properties of the Obs.
177
178        Parameters
179        ----------
180        S : float
181            specifies a custom value for the parameter S (default 2.0).
182            If set to 0 it is assumed that the data exhibits no
183            autocorrelation. In this case the error estimates coincides
184            with the sample standard error.
185        tau_exp : float
186            positive value triggers the critical slowing down analysis
187            (default 0.0).
188        N_sigma : float
189            number of standard deviations from zero until the tail is
190            attached to the autocorrelation function (default 1).
191        fft : bool
192            determines whether the fft algorithm is used for the computation
193            of the autocorrelation function (default True)
194        """
195
196        e_content = self.e_content
197        self.e_dvalue = {}
198        self.e_ddvalue = {}
199        self.e_tauint = {}
200        self.e_dtauint = {}
201        self.e_windowsize = {}
202        self.e_n_tauint = {}
203        self.e_n_dtauint = {}
204        e_gamma = {}
205        self.e_rho = {}
206        self.e_drho = {}
207        self._dvalue = 0
208        self.ddvalue = 0
209
210        self.S = {}
211        self.tau_exp = {}
212        self.N_sigma = {}
213
214        if kwargs.get('fft') is False:
215            fft = False
216        else:
217            fft = True
218
219        def _parse_kwarg(kwarg_name):
220            if kwarg_name in kwargs:
221                tmp = kwargs.get(kwarg_name)
222                if isinstance(tmp, (int, float)):
223                    if tmp < 0:
224                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
225                    for e, e_name in enumerate(self.e_names):
226                        getattr(self, kwarg_name)[e_name] = tmp
227                else:
228                    raise TypeError(kwarg_name + ' is not in proper format.')
229            else:
230                for e, e_name in enumerate(self.e_names):
231                    if e_name in getattr(Obs, kwarg_name + '_dict'):
232                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
233                    else:
234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
235
236        _parse_kwarg('S')
237        _parse_kwarg('tau_exp')
238        _parse_kwarg('N_sigma')
239
240        for e, e_name in enumerate(self.mc_names):
241            r_length = []
242            for r_name in e_content[e_name]:
243                if isinstance(self.idl[r_name], range):
244                    r_length.append(len(self.idl[r_name]))
245                else:
246                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
247
248            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
249            w_max = max(r_length) // 2
250            e_gamma[e_name] = np.zeros(w_max)
251            self.e_rho[e_name] = np.zeros(w_max)
252            self.e_drho[e_name] = np.zeros(w_max)
253
254            for r_name in e_content[e_name]:
255                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
256
257            gamma_div = np.zeros(w_max)
258            for r_name in e_content[e_name]:
259                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
260            gamma_div[gamma_div < 1] = 1.0
261            e_gamma[e_name] /= gamma_div[:w_max]
262
263            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
264                self.e_tauint[e_name] = 0.5
265                self.e_dtauint[e_name] = 0.0
266                self.e_dvalue[e_name] = 0.0
267                self.e_ddvalue[e_name] = 0.0
268                self.e_windowsize[e_name] = 0
269                continue
270
271            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
272            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
273            # Make sure no entry of tauint is smaller than 0.5
274            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
275            # hep-lat/0306017 eq. (42)
276            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
277            self.e_n_dtauint[e_name][0] = 0.0
278
279            def _compute_drho(i):
280                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
281                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
282
283            _compute_drho(1)
284            if self.tau_exp[e_name] > 0:
285                texp = self.tau_exp[e_name]
286                # Critical slowing down analysis
287                if w_max // 2 <= 1:
288                    raise Exception("Need at least 8 samples for tau_exp error analysis")
289                for n in range(1, w_max // 2):
290                    _compute_drho(n + 1)
291                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
292                        # Bias correction hep-lat/0306017 eq. (49) included
293                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
294                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
295                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
296                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
297                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
298                        self.e_windowsize[e_name] = n
299                        break
300            else:
301                if self.S[e_name] == 0.0:
302                    self.e_tauint[e_name] = 0.5
303                    self.e_dtauint[e_name] = 0.0
304                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
305                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
306                    self.e_windowsize[e_name] = 0
307                else:
308                    # Standard automatic windowing procedure
309                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
310                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
311                    for n in range(1, w_max):
312                        if n < w_max // 2 - 2:
313                            _compute_drho(n + 1)
314                        if g_w[n - 1] < 0 or n >= w_max - 1:
315                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
316                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
317                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
318                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
319                            self.e_windowsize[e_name] = n
320                            break
321
322            self._dvalue += self.e_dvalue[e_name] ** 2
323            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
324
325        for e_name in self.cov_names:
326            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
327            self.e_ddvalue[e_name] = 0
328            self._dvalue += self.e_dvalue[e_name]**2
329
330        self._dvalue = np.sqrt(self._dvalue)
331        if self._dvalue == 0.0:
332            self.ddvalue = 0.0
333        else:
334            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
335        return
336
337    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
338        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
339           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
340
341        Parameters
342        ----------
343        deltas : list
344            List of fluctuations
345        idx : list
346            List or range of configurations on which the deltas are defined.
347        shape : int
348            Number of configurations in idx.
349        w_max : int
350            Upper bound for the summation window.
351        fft : bool
352            determines whether the fft algorithm is used for the computation
353            of the autocorrelation function.
354        """
355        gamma = np.zeros(w_max)
356        deltas = _expand_deltas(deltas, idx, shape)
357        new_shape = len(deltas)
358        if fft:
359            max_gamma = min(new_shape, w_max)
360            # The padding for the fft has to be even
361            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
362            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
363        else:
364            for n in range(w_max):
365                if new_shape - n >= 0:
366                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
367
368        return gamma
369
370    def details(self, ens_content=True):
371        """Output detailed properties of the Obs.
372
373        Parameters
374        ----------
375        ens_content : bool
376            print details about the ensembles and replica if true.
377        """
378        if self.tag is not None:
379            print("Description:", self.tag)
380        if not hasattr(self, 'e_dvalue'):
381            print('Result\t %3.8e' % (self.value))
382        else:
383            if self.value == 0.0:
384                percentage = np.nan
385            else:
386                percentage = np.abs(self._dvalue / self.value) * 100
387            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
388            if len(self.e_names) > 1:
389                print(' Ensemble errors:')
390            for e_name in self.mc_names:
391                if len(self.e_names) > 1:
392                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
393                if self.tau_exp[e_name] > 0:
394                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
395                else:
396                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
397            for e_name in self.cov_names:
398                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
399        if ens_content is True:
400            if len(self.e_names) == 1:
401                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
402            else:
403                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
404            my_string_list = []
405            for key, value in sorted(self.e_content.items()):
406                if key not in self.covobs:
407                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
408                    if len(value) == 1:
409                        my_string += f': {self.shape[value[0]]} configurations'
410                        if isinstance(self.idl[value[0]], range):
411                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
412                        else:
413                            my_string += ' (irregular range)'
414                    else:
415                        sublist = []
416                        for v in value:
417                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
418                            my_substring += f': {self.shape[v]} configurations'
419                            if isinstance(self.idl[v], range):
420                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
421                            else:
422                                my_substring += ' (irregular range)'
423                            sublist.append(my_substring)
424
425                        my_string += '\n' + '\n'.join(sublist)
426                else:
427                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
428                my_string_list.append(my_string)
429            print('\n'.join(my_string_list))
430
431    def reweight(self, weight):
432        """Reweight the obs with given rewighting factors.
433
434        Parameters
435        ----------
436        weight : Obs
437            Reweighting factor. An Observable that has to be defined on a superset of the
438            configurations in obs[i].idl for all i.
439        all_configs : bool
440            if True, the reweighted observables are normalized by the average of
441            the reweighting factor on all configurations in weight.idl and not
442            on the configurations in obs[i].idl. Default False.
443        """
444        return reweight(weight, [self])[0]
445
446    def is_zero_within_error(self, sigma=1):
447        """Checks whether the observable is zero within 'sigma' standard errors.
448
449        Parameters
450        ----------
451        sigma : int
452            Number of standard errors used for the check.
453
454        Works only properly when the gamma method was run.
455        """
456        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
457
458    def is_zero(self, atol=1e-10):
459        """Checks whether the observable is zero within a given tolerance.
460
461        Parameters
462        ----------
463        atol : float
464            Absolute tolerance (for details see numpy documentation).
465        """
466        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
467
468    def plot_tauint(self, save=None):
469        """Plot integrated autocorrelation time for each ensemble.
470
471        Parameters
472        ----------
473        save : str
474            saves the figure to a file named 'save' if.
475        """
476        if not hasattr(self, 'e_dvalue'):
477            raise Exception('Run the gamma method first.')
478
479        for e, e_name in enumerate(self.mc_names):
480            fig = plt.figure()
481            plt.xlabel(r'$W$')
482            plt.ylabel(r'$\tau_\mathrm{int}$')
483            length = int(len(self.e_n_tauint[e_name]))
484            if self.tau_exp[e_name] > 0:
485                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
486                x_help = np.arange(2 * self.tau_exp[e_name])
487                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
488                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
489                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
490                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
491                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
492                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
493                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
494            else:
495                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
496                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
497
498            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
499            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
500            plt.legend()
501            plt.xlim(-0.5, xmax)
502            ylim = plt.ylim()
503            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
504            plt.draw()
505            if save:
506                fig.savefig(save + "_" + str(e))
507
508    def plot_rho(self, save=None):
509        """Plot normalized autocorrelation function time for each ensemble.
510
511        Parameters
512        ----------
513        save : str
514            saves the figure to a file named 'save' if.
515        """
516        if not hasattr(self, 'e_dvalue'):
517            raise Exception('Run the gamma method first.')
518        for e, e_name in enumerate(self.mc_names):
519            fig = plt.figure()
520            plt.xlabel('W')
521            plt.ylabel('rho')
522            length = int(len(self.e_drho[e_name]))
523            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
524            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
525            if self.tau_exp[e_name] > 0:
526                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
527                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
528                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
529                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
530            else:
531                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
532                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
533            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
534            plt.xlim(-0.5, xmax)
535            plt.draw()
536            if save:
537                fig.savefig(save + "_" + str(e))
538
539    def plot_rep_dist(self):
540        """Plot replica distribution for each ensemble with more than one replicum."""
541        if not hasattr(self, 'e_dvalue'):
542            raise Exception('Run the gamma method first.')
543        for e, e_name in enumerate(self.mc_names):
544            if len(self.e_content[e_name]) == 1:
545                print('No replica distribution for a single replicum (', e_name, ')')
546                continue
547            r_length = []
548            sub_r_mean = 0
549            for r, r_name in enumerate(self.e_content[e_name]):
550                r_length.append(len(self.deltas[r_name]))
551                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
552            e_N = np.sum(r_length)
553            sub_r_mean /= e_N
554            arr = np.zeros(len(self.e_content[e_name]))
555            for r, r_name in enumerate(self.e_content[e_name]):
556                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
557            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
558            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
559            plt.draw()
560
561    def plot_history(self, expand=True):
562        """Plot derived Monte Carlo history for each ensemble
563
564        Parameters
565        ----------
566        expand : bool
567            show expanded history for irregular Monte Carlo chains (default: True).
568        """
569        for e, e_name in enumerate(self.mc_names):
570            plt.figure()
571            r_length = []
572            tmp = []
573            tmp_expanded = []
574            for r, r_name in enumerate(self.e_content[e_name]):
575                tmp.append(self.deltas[r_name] + self.r_values[r_name])
576                if expand:
577                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
578                    r_length.append(len(tmp_expanded[-1]))
579                else:
580                    r_length.append(len(tmp[-1]))
581            e_N = np.sum(r_length)
582            x = np.arange(e_N)
583            y_test = np.concatenate(tmp, axis=0)
584            if expand:
585                y = np.concatenate(tmp_expanded, axis=0)
586            else:
587                y = y_test
588            plt.errorbar(x, y, fmt='.', markersize=3)
589            plt.xlim(-0.5, e_N - 0.5)
590            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
591            plt.draw()
592
593    def plot_piechart(self, save=None):
594        """Plot piechart which shows the fractional contribution of each
595        ensemble to the error and returns a dictionary containing the fractions.
596
597        Parameters
598        ----------
599        save : str
600            saves the figure to a file named 'save' if.
601        """
602        if not hasattr(self, 'e_dvalue'):
603            raise Exception('Run the gamma method first.')
604        if np.isclose(0.0, self._dvalue, atol=1e-15):
605            raise Exception('Error is 0.0')
606        labels = self.e_names
607        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
608        fig1, ax1 = plt.subplots()
609        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
610        ax1.axis('equal')
611        plt.draw()
612        if save:
613            fig1.savefig(save)
614
615        return dict(zip(self.e_names, sizes))
616
617    def dump(self, filename, datatype="json.gz", description="", **kwargs):
618        """Dump the Obs to a file 'name' of chosen format.
619
620        Parameters
621        ----------
622        filename : str
623            name of the file to be saved.
624        datatype : str
625            Format of the exported file. Supported formats include
626            "json.gz" and "pickle"
627        description : str
628            Description for output file, only relevant for json.gz format.
629        path : str
630            specifies a custom path for the file (default '.')
631        """
632        if 'path' in kwargs:
633            file_name = kwargs.get('path') + '/' + filename
634        else:
635            file_name = filename
636
637        if datatype == "json.gz":
638            from .input.json import dump_to_json
639            dump_to_json([self], file_name, description=description)
640        elif datatype == "pickle":
641            with open(file_name + '.p', 'wb') as fb:
642                pickle.dump(self, fb)
643        else:
644            raise Exception("Unknown datatype " + str(datatype))
645
646    def export_jackknife(self):
647        """Export jackknife samples from the Obs
648
649        Returns
650        -------
651        numpy.ndarray
652            Returns a numpy array of length N + 1 where N is the number of samples
653            for the given ensemble and replicum. The zeroth entry of the array contains
654            the mean value of the Obs, entries 1 to N contain the N jackknife samples
655            derived from the Obs. The current implementation only works for observables
656            defined on exactly one ensemble and replicum. The derived jackknife samples
657            should agree with samples from a full jackknife analysis up to O(1/N).
658        """
659
660        if len(self.names) != 1:
661            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
662
663        name = self.names[0]
664        full_data = self.deltas[name] + self.r_values[name]
665        n = full_data.size
666        mean = self.value
667        tmp_jacks = np.zeros(n + 1)
668        tmp_jacks[0] = mean
669        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
670        return tmp_jacks
671
672    def __float__(self):
673        return float(self.value)
674
675    def __repr__(self):
676        return 'Obs[' + str(self) + ']'
677
678    def __str__(self):
679        if self._dvalue == 0.0:
680            return str(self.value)
681        fexp = np.floor(np.log10(self._dvalue))
682        if fexp < 0.0:
683            return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
684        elif fexp == 0.0:
685            return '{:.1f}({:1.1f})'.format(self.value, self._dvalue)
686        else:
687            return '{:.0f}({:2.0f})'.format(self.value, self._dvalue)
688
689    def __hash__(self):
690        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
691        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
692        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
693        hash_tuple += tuple([o.encode() for o in self.names])
694        m = hashlib.md5()
695        [m.update(o) for o in hash_tuple]
696        return int(m.hexdigest(), 16) & 0xFFFFFFFF
697
698    # Overload comparisons
699    def __lt__(self, other):
700        return self.value < other
701
702    def __le__(self, other):
703        return self.value <= other
704
705    def __gt__(self, other):
706        return self.value > other
707
708    def __ge__(self, other):
709        return self.value >= other
710
711    def __eq__(self, other):
712        return (self - other).is_zero()
713
714    def __ne__(self, other):
715        return not (self - other).is_zero()
716
717    # Overload math operations
718    def __add__(self, y):
719        if isinstance(y, Obs):
720            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
721        else:
722            if isinstance(y, np.ndarray):
723                return np.array([self + o for o in y])
724            elif y.__class__.__name__ in ['Corr', 'CObs']:
725                return NotImplemented
726            else:
727                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
728
729    def __radd__(self, y):
730        return self + y
731
732    def __mul__(self, y):
733        if isinstance(y, Obs):
734            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
735        else:
736            if isinstance(y, np.ndarray):
737                return np.array([self * o for o in y])
738            elif isinstance(y, complex):
739                return CObs(self * y.real, self * y.imag)
740            elif y.__class__.__name__ in ['Corr', 'CObs']:
741                return NotImplemented
742            else:
743                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
744
745    def __rmul__(self, y):
746        return self * y
747
748    def __sub__(self, y):
749        if isinstance(y, Obs):
750            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
751        else:
752            if isinstance(y, np.ndarray):
753                return np.array([self - o for o in y])
754            elif y.__class__.__name__ in ['Corr', 'CObs']:
755                return NotImplemented
756            else:
757                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
758
759    def __rsub__(self, y):
760        return -1 * (self - y)
761
762    def __pos__(self):
763        return self
764
765    def __neg__(self):
766        return -1 * self
767
768    def __truediv__(self, y):
769        if isinstance(y, Obs):
770            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
771        else:
772            if isinstance(y, np.ndarray):
773                return np.array([self / o for o in y])
774            elif y.__class__.__name__ in ['Corr', 'CObs']:
775                return NotImplemented
776            else:
777                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
778
779    def __rtruediv__(self, y):
780        if isinstance(y, Obs):
781            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
782        else:
783            if isinstance(y, np.ndarray):
784                return np.array([o / self for o in y])
785            elif y.__class__.__name__ in ['Corr', 'CObs']:
786                return NotImplemented
787            else:
788                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
789
790    def __pow__(self, y):
791        if isinstance(y, Obs):
792            return derived_observable(lambda x: x[0] ** x[1], [self, y])
793        else:
794            return derived_observable(lambda x: x[0] ** y, [self])
795
796    def __rpow__(self, y):
797        if isinstance(y, Obs):
798            return derived_observable(lambda x: x[0] ** x[1], [y, self])
799        else:
800            return derived_observable(lambda x: y ** x[0], [self])
801
802    def __abs__(self):
803        return derived_observable(lambda x: anp.abs(x[0]), [self])
804
805    # Overload numpy functions
806    def sqrt(self):
807        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
808
809    def log(self):
810        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
811
812    def exp(self):
813        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
814
815    def sin(self):
816        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
817
818    def cos(self):
819        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
820
821    def tan(self):
822        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
823
824    def arcsin(self):
825        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
826
827    def arccos(self):
828        return derived_observable(lambda x: anp.arccos(x[0]), [self])
829
830    def arctan(self):
831        return derived_observable(lambda x: anp.arctan(x[0]), [self])
832
833    def sinh(self):
834        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
835
836    def cosh(self):
837        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
838
839    def tanh(self):
840        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
841
842    def arcsinh(self):
843        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
844
845    def arccosh(self):
846        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
847
848    def arctanh(self):
849        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 60    def __init__(self, samples, names, idl=None, **kwargs):
 61        """ Initialize Obs object.
 62
 63        Parameters
 64        ----------
 65        samples : list
 66            list of numpy arrays containing the Monte Carlo samples
 67        names : list
 68            list of strings labeling the individual samples
 69        idl : list, optional
 70            list of ranges or lists on which the samples are defined
 71        """
 72
 73        if kwargs.get("means") is None and len(samples):
 74            if len(samples) != len(names):
 75                raise Exception('Length of samples and names incompatible.')
 76            if idl is not None:
 77                if len(idl) != len(names):
 78                    raise Exception('Length of idl incompatible with samples and names.')
 79            name_length = len(names)
 80            if name_length > 1:
 81                if name_length != len(set(names)):
 82                    raise Exception('names are not unique.')
 83                if not all(isinstance(x, str) for x in names):
 84                    raise TypeError('All names have to be strings.')
 85            else:
 86                if not isinstance(names[0], str):
 87                    raise TypeError('All names have to be strings.')
 88            if min(len(x) for x in samples) <= 4:
 89                raise Exception('Samples have to have at least 5 entries.')
 90
 91        self.names = sorted(names)
 92        self.shape = {}
 93        self.r_values = {}
 94        self.deltas = {}
 95        self._covobs = {}
 96
 97        self._value = 0
 98        self.N = 0
 99        self.is_merged = {}
100        self.idl = {}
101        if idl is not None:
102            for name, idx in sorted(zip(names, idl)):
103                if isinstance(idx, range):
104                    self.idl[name] = idx
105                elif isinstance(idx, (list, np.ndarray)):
106                    dc = np.unique(np.diff(idx))
107                    if np.any(dc < 0):
108                        raise Exception("Unsorted idx for idl[%s]" % (name))
109                    if len(dc) == 1:
110                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
111                    else:
112                        self.idl[name] = list(idx)
113                else:
114                    raise Exception('incompatible type for idl[%s].' % (name))
115        else:
116            for name, sample in sorted(zip(names, samples)):
117                self.idl[name] = range(1, len(sample) + 1)
118
119        if kwargs.get("means") is not None:
120            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
121                self.shape[name] = len(self.idl[name])
122                self.N += self.shape[name]
123                self.r_values[name] = mean
124                self.deltas[name] = sample
125        else:
126            for name, sample in sorted(zip(names, samples)):
127                self.shape[name] = len(self.idl[name])
128                self.N += self.shape[name]
129                if len(sample) != self.shape[name]:
130                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
131                self.r_values[name] = np.mean(sample)
132                self.deltas[name] = sample - self.r_values[name]
133                self._value += self.shape[name] * self.r_values[name]
134            self._value /= self.N
135
136        self._dvalue = 0.0
137        self.ddvalue = 0.0
138        self.reweighted = False
139
140        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
def gamma_method(self, **kwargs):
175    def gamma_method(self, **kwargs):
176        """Estimate the error and related properties of the Obs.
177
178        Parameters
179        ----------
180        S : float
181            specifies a custom value for the parameter S (default 2.0).
182            If set to 0 it is assumed that the data exhibits no
183            autocorrelation. In this case the error estimates coincides
184            with the sample standard error.
185        tau_exp : float
186            positive value triggers the critical slowing down analysis
187            (default 0.0).
188        N_sigma : float
189            number of standard deviations from zero until the tail is
190            attached to the autocorrelation function (default 1).
191        fft : bool
192            determines whether the fft algorithm is used for the computation
193            of the autocorrelation function (default True)
194        """
195
196        e_content = self.e_content
197        self.e_dvalue = {}
198        self.e_ddvalue = {}
199        self.e_tauint = {}
200        self.e_dtauint = {}
201        self.e_windowsize = {}
202        self.e_n_tauint = {}
203        self.e_n_dtauint = {}
204        e_gamma = {}
205        self.e_rho = {}
206        self.e_drho = {}
207        self._dvalue = 0
208        self.ddvalue = 0
209
210        self.S = {}
211        self.tau_exp = {}
212        self.N_sigma = {}
213
214        if kwargs.get('fft') is False:
215            fft = False
216        else:
217            fft = True
218
219        def _parse_kwarg(kwarg_name):
220            if kwarg_name in kwargs:
221                tmp = kwargs.get(kwarg_name)
222                if isinstance(tmp, (int, float)):
223                    if tmp < 0:
224                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
225                    for e, e_name in enumerate(self.e_names):
226                        getattr(self, kwarg_name)[e_name] = tmp
227                else:
228                    raise TypeError(kwarg_name + ' is not in proper format.')
229            else:
230                for e, e_name in enumerate(self.e_names):
231                    if e_name in getattr(Obs, kwarg_name + '_dict'):
232                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
233                    else:
234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
235
236        _parse_kwarg('S')
237        _parse_kwarg('tau_exp')
238        _parse_kwarg('N_sigma')
239
240        for e, e_name in enumerate(self.mc_names):
241            r_length = []
242            for r_name in e_content[e_name]:
243                if isinstance(self.idl[r_name], range):
244                    r_length.append(len(self.idl[r_name]))
245                else:
246                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
247
248            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
249            w_max = max(r_length) // 2
250            e_gamma[e_name] = np.zeros(w_max)
251            self.e_rho[e_name] = np.zeros(w_max)
252            self.e_drho[e_name] = np.zeros(w_max)
253
254            for r_name in e_content[e_name]:
255                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
256
257            gamma_div = np.zeros(w_max)
258            for r_name in e_content[e_name]:
259                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
260            gamma_div[gamma_div < 1] = 1.0
261            e_gamma[e_name] /= gamma_div[:w_max]
262
263            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
264                self.e_tauint[e_name] = 0.5
265                self.e_dtauint[e_name] = 0.0
266                self.e_dvalue[e_name] = 0.0
267                self.e_ddvalue[e_name] = 0.0
268                self.e_windowsize[e_name] = 0
269                continue
270
271            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
272            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
273            # Make sure no entry of tauint is smaller than 0.5
274            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
275            # hep-lat/0306017 eq. (42)
276            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
277            self.e_n_dtauint[e_name][0] = 0.0
278
279            def _compute_drho(i):
280                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
281                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
282
283            _compute_drho(1)
284            if self.tau_exp[e_name] > 0:
285                texp = self.tau_exp[e_name]
286                # Critical slowing down analysis
287                if w_max // 2 <= 1:
288                    raise Exception("Need at least 8 samples for tau_exp error analysis")
289                for n in range(1, w_max // 2):
290                    _compute_drho(n + 1)
291                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
292                        # Bias correction hep-lat/0306017 eq. (49) included
293                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
294                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
295                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
296                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
297                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
298                        self.e_windowsize[e_name] = n
299                        break
300            else:
301                if self.S[e_name] == 0.0:
302                    self.e_tauint[e_name] = 0.5
303                    self.e_dtauint[e_name] = 0.0
304                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
305                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
306                    self.e_windowsize[e_name] = 0
307                else:
308                    # Standard automatic windowing procedure
309                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
310                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
311                    for n in range(1, w_max):
312                        if n < w_max // 2 - 2:
313                            _compute_drho(n + 1)
314                        if g_w[n - 1] < 0 or n >= w_max - 1:
315                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
316                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
317                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
318                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
319                            self.e_windowsize[e_name] = n
320                            break
321
322            self._dvalue += self.e_dvalue[e_name] ** 2
323            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
324
325        for e_name in self.cov_names:
326            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
327            self.e_ddvalue[e_name] = 0
328            self._dvalue += self.e_dvalue[e_name]**2
329
330        self._dvalue = np.sqrt(self._dvalue)
331        if self._dvalue == 0.0:
332            self.ddvalue = 0.0
333        else:
334            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
335        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True):
370    def details(self, ens_content=True):
371        """Output detailed properties of the Obs.
372
373        Parameters
374        ----------
375        ens_content : bool
376            print details about the ensembles and replica if true.
377        """
378        if self.tag is not None:
379            print("Description:", self.tag)
380        if not hasattr(self, 'e_dvalue'):
381            print('Result\t %3.8e' % (self.value))
382        else:
383            if self.value == 0.0:
384                percentage = np.nan
385            else:
386                percentage = np.abs(self._dvalue / self.value) * 100
387            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
388            if len(self.e_names) > 1:
389                print(' Ensemble errors:')
390            for e_name in self.mc_names:
391                if len(self.e_names) > 1:
392                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
393                if self.tau_exp[e_name] > 0:
394                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
395                else:
396                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
397            for e_name in self.cov_names:
398                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
399        if ens_content is True:
400            if len(self.e_names) == 1:
401                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
402            else:
403                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
404            my_string_list = []
405            for key, value in sorted(self.e_content.items()):
406                if key not in self.covobs:
407                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
408                    if len(value) == 1:
409                        my_string += f': {self.shape[value[0]]} configurations'
410                        if isinstance(self.idl[value[0]], range):
411                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
412                        else:
413                            my_string += ' (irregular range)'
414                    else:
415                        sublist = []
416                        for v in value:
417                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
418                            my_substring += f': {self.shape[v]} configurations'
419                            if isinstance(self.idl[v], range):
420                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
421                            else:
422                                my_substring += ' (irregular range)'
423                            sublist.append(my_substring)
424
425                        my_string += '\n' + '\n'.join(sublist)
426                else:
427                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
428                my_string_list.append(my_string)
429            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def reweight(self, weight):
431    def reweight(self, weight):
432        """Reweight the obs with given rewighting factors.
433
434        Parameters
435        ----------
436        weight : Obs
437            Reweighting factor. An Observable that has to be defined on a superset of the
438            configurations in obs[i].idl for all i.
439        all_configs : bool
440            if True, the reweighted observables are normalized by the average of
441            the reweighting factor on all configurations in weight.idl and not
442            on the configurations in obs[i].idl. Default False.
443        """
444        return reweight(weight, [self])[0]

Reweight the obs with given rewighting factors.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def is_zero_within_error(self, sigma=1):
446    def is_zero_within_error(self, sigma=1):
447        """Checks whether the observable is zero within 'sigma' standard errors.
448
449        Parameters
450        ----------
451        sigma : int
452            Number of standard errors used for the check.
453
454        Works only properly when the gamma method was run.
455        """
456        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10):
458    def is_zero(self, atol=1e-10):
459        """Checks whether the observable is zero within a given tolerance.
460
461        Parameters
462        ----------
463        atol : float
464            Absolute tolerance (for details see numpy documentation).
465        """
466        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None):
468    def plot_tauint(self, save=None):
469        """Plot integrated autocorrelation time for each ensemble.
470
471        Parameters
472        ----------
473        save : str
474            saves the figure to a file named 'save' if.
475        """
476        if not hasattr(self, 'e_dvalue'):
477            raise Exception('Run the gamma method first.')
478
479        for e, e_name in enumerate(self.mc_names):
480            fig = plt.figure()
481            plt.xlabel(r'$W$')
482            plt.ylabel(r'$\tau_\mathrm{int}$')
483            length = int(len(self.e_n_tauint[e_name]))
484            if self.tau_exp[e_name] > 0:
485                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
486                x_help = np.arange(2 * self.tau_exp[e_name])
487                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
488                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
489                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
490                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
491                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
492                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
493                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
494            else:
495                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
496                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
497
498            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
499            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
500            plt.legend()
501            plt.xlim(-0.5, xmax)
502            ylim = plt.ylim()
503            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
504            plt.draw()
505            if save:
506                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None):
508    def plot_rho(self, save=None):
509        """Plot normalized autocorrelation function time for each ensemble.
510
511        Parameters
512        ----------
513        save : str
514            saves the figure to a file named 'save' if.
515        """
516        if not hasattr(self, 'e_dvalue'):
517            raise Exception('Run the gamma method first.')
518        for e, e_name in enumerate(self.mc_names):
519            fig = plt.figure()
520            plt.xlabel('W')
521            plt.ylabel('rho')
522            length = int(len(self.e_drho[e_name]))
523            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
524            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
525            if self.tau_exp[e_name] > 0:
526                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
527                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
528                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
529                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
530            else:
531                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
532                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
533            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
534            plt.xlim(-0.5, xmax)
535            plt.draw()
536            if save:
537                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self):
539    def plot_rep_dist(self):
540        """Plot replica distribution for each ensemble with more than one replicum."""
541        if not hasattr(self, 'e_dvalue'):
542            raise Exception('Run the gamma method first.')
543        for e, e_name in enumerate(self.mc_names):
544            if len(self.e_content[e_name]) == 1:
545                print('No replica distribution for a single replicum (', e_name, ')')
546                continue
547            r_length = []
548            sub_r_mean = 0
549            for r, r_name in enumerate(self.e_content[e_name]):
550                r_length.append(len(self.deltas[r_name]))
551                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
552            e_N = np.sum(r_length)
553            sub_r_mean /= e_N
554            arr = np.zeros(len(self.e_content[e_name]))
555            for r, r_name in enumerate(self.e_content[e_name]):
556                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
557            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
558            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
559            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True):
561    def plot_history(self, expand=True):
562        """Plot derived Monte Carlo history for each ensemble
563
564        Parameters
565        ----------
566        expand : bool
567            show expanded history for irregular Monte Carlo chains (default: True).
568        """
569        for e, e_name in enumerate(self.mc_names):
570            plt.figure()
571            r_length = []
572            tmp = []
573            tmp_expanded = []
574            for r, r_name in enumerate(self.e_content[e_name]):
575                tmp.append(self.deltas[r_name] + self.r_values[r_name])
576                if expand:
577                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
578                    r_length.append(len(tmp_expanded[-1]))
579                else:
580                    r_length.append(len(tmp[-1]))
581            e_N = np.sum(r_length)
582            x = np.arange(e_N)
583            y_test = np.concatenate(tmp, axis=0)
584            if expand:
585                y = np.concatenate(tmp_expanded, axis=0)
586            else:
587                y = y_test
588            plt.errorbar(x, y, fmt='.', markersize=3)
589            plt.xlim(-0.5, e_N - 0.5)
590            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
591            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None):
593    def plot_piechart(self, save=None):
594        """Plot piechart which shows the fractional contribution of each
595        ensemble to the error and returns a dictionary containing the fractions.
596
597        Parameters
598        ----------
599        save : str
600            saves the figure to a file named 'save' if.
601        """
602        if not hasattr(self, 'e_dvalue'):
603            raise Exception('Run the gamma method first.')
604        if np.isclose(0.0, self._dvalue, atol=1e-15):
605            raise Exception('Error is 0.0')
606        labels = self.e_names
607        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
608        fig1, ax1 = plt.subplots()
609        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
610        ax1.axis('equal')
611        plt.draw()
612        if save:
613            fig1.savefig(save)
614
615        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs):
617    def dump(self, filename, datatype="json.gz", description="", **kwargs):
618        """Dump the Obs to a file 'name' of chosen format.
619
620        Parameters
621        ----------
622        filename : str
623            name of the file to be saved.
624        datatype : str
625            Format of the exported file. Supported formats include
626            "json.gz" and "pickle"
627        description : str
628            Description for output file, only relevant for json.gz format.
629        path : str
630            specifies a custom path for the file (default '.')
631        """
632        if 'path' in kwargs:
633            file_name = kwargs.get('path') + '/' + filename
634        else:
635            file_name = filename
636
637        if datatype == "json.gz":
638            from .input.json import dump_to_json
639            dump_to_json([self], file_name, description=description)
640        elif datatype == "pickle":
641            with open(file_name + '.p', 'wb') as fb:
642                pickle.dump(self, fb)
643        else:
644            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self):
646    def export_jackknife(self):
647        """Export jackknife samples from the Obs
648
649        Returns
650        -------
651        numpy.ndarray
652            Returns a numpy array of length N + 1 where N is the number of samples
653            for the given ensemble and replicum. The zeroth entry of the array contains
654            the mean value of the Obs, entries 1 to N contain the N jackknife samples
655            derived from the Obs. The current implementation only works for observables
656            defined on exactly one ensemble and replicum. The derived jackknife samples
657            should agree with samples from a full jackknife analysis up to O(1/N).
658        """
659
660        if len(self.names) != 1:
661            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
662
663        name = self.names[0]
664        full_data = self.deltas[name] + self.r_values[name]
665        n = full_data.size
666        mean = self.value
667        tmp_jacks = np.zeros(n + 1)
668        tmp_jacks[0] = mean
669        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
670        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def sqrt(self):
806    def sqrt(self):
807        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self):
809    def log(self):
810        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self):
812    def exp(self):
813        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self):
815    def sin(self):
816        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self):
818    def cos(self):
819        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self):
821    def tan(self):
822        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self):
824    def arcsin(self):
825        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self):
827    def arccos(self):
828        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self):
830    def arctan(self):
831        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self):
833    def sinh(self):
834        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self):
836    def cosh(self):
837        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self):
839    def tanh(self):
840        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self):
842    def arcsinh(self):
843        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self):
845    def arccosh(self):
846        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self):
848    def arctanh(self):
849        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
class CObs:
852class CObs:
853    """Class for a complex valued observable."""
854    __slots__ = ['_real', '_imag', 'tag']
855
856    def __init__(self, real, imag=0.0):
857        self._real = real
858        self._imag = imag
859        self.tag = None
860
861    @property
862    def real(self):
863        return self._real
864
865    @property
866    def imag(self):
867        return self._imag
868
869    def gamma_method(self, **kwargs):
870        """Executes the gamma_method for the real and the imaginary part."""
871        if isinstance(self.real, Obs):
872            self.real.gamma_method(**kwargs)
873        if isinstance(self.imag, Obs):
874            self.imag.gamma_method(**kwargs)
875
876    def is_zero(self):
877        """Checks whether both real and imaginary part are zero within machine precision."""
878        return self.real == 0.0 and self.imag == 0.0
879
880    def conjugate(self):
881        return CObs(self.real, -self.imag)
882
883    def __add__(self, other):
884        if isinstance(other, np.ndarray):
885            return other + self
886        elif hasattr(other, 'real') and hasattr(other, 'imag'):
887            return CObs(self.real + other.real,
888                        self.imag + other.imag)
889        else:
890            return CObs(self.real + other, self.imag)
891
892    def __radd__(self, y):
893        return self + y
894
895    def __sub__(self, other):
896        if isinstance(other, np.ndarray):
897            return -1 * (other - self)
898        elif hasattr(other, 'real') and hasattr(other, 'imag'):
899            return CObs(self.real - other.real, self.imag - other.imag)
900        else:
901            return CObs(self.real - other, self.imag)
902
903    def __rsub__(self, other):
904        return -1 * (self - other)
905
906    def __mul__(self, other):
907        if isinstance(other, np.ndarray):
908            return other * self
909        elif hasattr(other, 'real') and hasattr(other, 'imag'):
910            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
911                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
912                                               [self.real, other.real, self.imag, other.imag],
913                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
914                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
915                                               [self.real, other.real, self.imag, other.imag],
916                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
917            elif getattr(other, 'imag', 0) != 0:
918                return CObs(self.real * other.real - self.imag * other.imag,
919                            self.imag * other.real + self.real * other.imag)
920            else:
921                return CObs(self.real * other.real, self.imag * other.real)
922        else:
923            return CObs(self.real * other, self.imag * other)
924
925    def __rmul__(self, other):
926        return self * other
927
928    def __truediv__(self, other):
929        if isinstance(other, np.ndarray):
930            return 1 / (other / self)
931        elif hasattr(other, 'real') and hasattr(other, 'imag'):
932            r = other.real ** 2 + other.imag ** 2
933            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
934        else:
935            return CObs(self.real / other, self.imag / other)
936
937    def __rtruediv__(self, other):
938        r = self.real ** 2 + self.imag ** 2
939        if hasattr(other, 'real') and hasattr(other, 'imag'):
940            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
941        else:
942            return CObs(self.real * other / r, -self.imag * other / r)
943
944    def __abs__(self):
945        return np.sqrt(self.real**2 + self.imag**2)
946
947    def __pos__(self):
948        return self
949
950    def __neg__(self):
951        return -1 * self
952
953    def __eq__(self, other):
954        return self.real == other.real and self.imag == other.imag
955
956    def __str__(self):
957        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
958
959    def __repr__(self):
960        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0)
856    def __init__(self, real, imag=0.0):
857        self._real = real
858        self._imag = imag
859        self.tag = None
def gamma_method(self, **kwargs):
869    def gamma_method(self, **kwargs):
870        """Executes the gamma_method for the real and the imaginary part."""
871        if isinstance(self.real, Obs):
872            self.real.gamma_method(**kwargs)
873        if isinstance(self.imag, Obs):
874            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self):
876    def is_zero(self):
877        """Checks whether both real and imaginary part are zero within machine precision."""
878        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self):
880    def conjugate(self):
881        return CObs(self.real, -self.imag)
def derived_observable(func, data, array_mode=False, **kwargs):
1129def derived_observable(func, data, array_mode=False, **kwargs):
1130    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1131
1132    Parameters
1133    ----------
1134    func : object
1135        arbitrary function of the form func(data, **kwargs). For the
1136        automatic differentiation to work, all numpy functions have to have
1137        the autograd wrapper (use 'import autograd.numpy as anp').
1138    data : list
1139        list of Obs, e.g. [obs1, obs2, obs3].
1140    num_grad : bool
1141        if True, numerical derivatives are used instead of autograd
1142        (default False). To control the numerical differentiation the
1143        kwargs of numdifftools.step_generators.MaxStepGenerator
1144        can be used.
1145    man_grad : list
1146        manually supply a list or an array which contains the jacobian
1147        of func. Use cautiously, supplying the wrong derivative will
1148        not be intercepted.
1149
1150    Notes
1151    -----
1152    For simple mathematical operations it can be practical to use anonymous
1153    functions. For the ratio of two observables one can e.g. use
1154
1155    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1156    """
1157
1158    data = np.asarray(data)
1159    raveled_data = data.ravel()
1160
1161    # Workaround for matrix operations containing non Obs data
1162    if not all(isinstance(x, Obs) for x in raveled_data):
1163        for i in range(len(raveled_data)):
1164            if isinstance(raveled_data[i], (int, float)):
1165                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1166
1167    allcov = {}
1168    for o in raveled_data:
1169        for name in o.cov_names:
1170            if name in allcov:
1171                if not np.allclose(allcov[name], o.covobs[name].cov):
1172                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1173            else:
1174                allcov[name] = o.covobs[name].cov
1175
1176    n_obs = len(raveled_data)
1177    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1178    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1179    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1180
1181    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1182    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1183
1184    if data.ndim == 1:
1185        values = np.array([o.value for o in data])
1186    else:
1187        values = np.vectorize(lambda x: x.value)(data)
1188
1189    new_values = func(values, **kwargs)
1190
1191    multi = int(isinstance(new_values, np.ndarray))
1192
1193    new_r_values = {}
1194    new_idl_d = {}
1195    for name in new_sample_names:
1196        idl = []
1197        tmp_values = np.zeros(n_obs)
1198        for i, item in enumerate(raveled_data):
1199            tmp_values[i] = item.r_values.get(name, item.value)
1200            tmp_idl = item.idl.get(name)
1201            if tmp_idl is not None:
1202                idl.append(tmp_idl)
1203        if multi > 0:
1204            tmp_values = np.array(tmp_values).reshape(data.shape)
1205        new_r_values[name] = func(tmp_values, **kwargs)
1206        new_idl_d[name] = _merge_idx(idl)
1207        if not is_merged[name]:
1208            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1209
1210    if 'man_grad' in kwargs:
1211        deriv = np.asarray(kwargs.get('man_grad'))
1212        if new_values.shape + data.shape != deriv.shape:
1213            raise Exception('Manual derivative does not have correct shape.')
1214    elif kwargs.get('num_grad') is True:
1215        if multi > 0:
1216            raise Exception('Multi mode currently not supported for numerical derivative')
1217        options = {
1218            'base_step': 0.1,
1219            'step_ratio': 2.5}
1220        for key in options.keys():
1221            kwarg = kwargs.get(key)
1222            if kwarg is not None:
1223                options[key] = kwarg
1224        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1225        if tmp_df.size == 1:
1226            deriv = np.array([tmp_df.real])
1227        else:
1228            deriv = tmp_df.real
1229    else:
1230        deriv = jacobian(func)(values, **kwargs)
1231
1232    final_result = np.zeros(new_values.shape, dtype=object)
1233
1234    if array_mode is True:
1235
1236        class _Zero_grad():
1237            def __init__(self, N):
1238                self.grad = np.zeros((N, 1))
1239
1240        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1241        d_extracted = {}
1242        g_extracted = {}
1243        for name in new_sample_names:
1244            d_extracted[name] = []
1245            ens_length = len(new_idl_d[name])
1246            for i_dat, dat in enumerate(data):
1247                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1248        for name in new_cov_names:
1249            g_extracted[name] = []
1250            zero_grad = _Zero_grad(new_covobs_lengths[name])
1251            for i_dat, dat in enumerate(data):
1252                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1253
1254    for i_val, new_val in np.ndenumerate(new_values):
1255        new_deltas = {}
1256        new_grad = {}
1257        if array_mode is True:
1258            for name in new_sample_names:
1259                ens_length = d_extracted[name][0].shape[-1]
1260                new_deltas[name] = np.zeros(ens_length)
1261                for i_dat, dat in enumerate(d_extracted[name]):
1262                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1263            for name in new_cov_names:
1264                new_grad[name] = 0
1265                for i_dat, dat in enumerate(g_extracted[name]):
1266                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1267        else:
1268            for j_obs, obs in np.ndenumerate(data):
1269                for name in obs.names:
1270                    if name in obs.cov_names:
1271                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1272                    else:
1273                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1274
1275        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1276
1277        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1278            raise Exception('The same name has been used for deltas and covobs!')
1279        new_samples = []
1280        new_means = []
1281        new_idl = []
1282        new_names_obs = []
1283        for name in new_names:
1284            if name not in new_covobs:
1285                if is_merged[name]:
1286                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1287                else:
1288                    filtered_deltas = new_deltas[name]
1289                    filtered_idl_d = new_idl_d[name]
1290
1291                new_samples.append(filtered_deltas)
1292                new_idl.append(filtered_idl_d)
1293                new_means.append(new_r_values[name][i_val])
1294                new_names_obs.append(name)
1295        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1296        for name in new_covobs:
1297            final_result[i_val].names.append(name)
1298        final_result[i_val]._covobs = new_covobs
1299        final_result[i_val]._value = new_val
1300        final_result[i_val].is_merged = is_merged
1301        final_result[i_val].reweighted = reweighted
1302
1303    if multi == 0:
1304        final_result = final_result.item()
1305
1306    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs):
1346def reweight(weight, obs, **kwargs):
1347    """Reweight a list of observables.
1348
1349    Parameters
1350    ----------
1351    weight : Obs
1352        Reweighting factor. An Observable that has to be defined on a superset of the
1353        configurations in obs[i].idl for all i.
1354    obs : list
1355        list of Obs, e.g. [obs1, obs2, obs3].
1356    all_configs : bool
1357        if True, the reweighted observables are normalized by the average of
1358        the reweighting factor on all configurations in weight.idl and not
1359        on the configurations in obs[i].idl. Default False.
1360    """
1361    result = []
1362    for i in range(len(obs)):
1363        if len(obs[i].cov_names):
1364            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1365        if not set(obs[i].names).issubset(weight.names):
1366            raise Exception('Error: Ensembles do not fit')
1367        for name in obs[i].names:
1368            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1369                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1370        new_samples = []
1371        w_deltas = {}
1372        for name in sorted(obs[i].names):
1373            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1374            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1375        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1376
1377        if kwargs.get('all_configs'):
1378            new_weight = weight
1379        else:
1380            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1381
1382        result.append(tmp_obs / new_weight)
1383        result[-1].reweighted = True
1384        result[-1].is_merged = obs[i].is_merged
1385
1386    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def correlate(obs_a, obs_b):
1389def correlate(obs_a, obs_b):
1390    """Correlate two observables.
1391
1392    Parameters
1393    ----------
1394    obs_a : Obs
1395        First observable
1396    obs_b : Obs
1397        Second observable
1398
1399    Notes
1400    -----
1401    Keep in mind to only correlate primary observables which have not been reweighted
1402    yet. The reweighting has to be applied after correlating the observables.
1403    Currently only works if ensembles are identical (this is not strictly necessary).
1404    """
1405
1406    if sorted(obs_a.names) != sorted(obs_b.names):
1407        raise Exception('Ensembles do not fit')
1408    if len(obs_a.cov_names) or len(obs_b.cov_names):
1409        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1410    for name in obs_a.names:
1411        if obs_a.shape[name] != obs_b.shape[name]:
1412            raise Exception('Shapes of ensemble', name, 'do not fit')
1413        if obs_a.idl[name] != obs_b.idl[name]:
1414            raise Exception('idl of ensemble', name, 'do not fit')
1415
1416    if obs_a.reweighted is True:
1417        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1418    if obs_b.reweighted is True:
1419        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1420
1421    new_samples = []
1422    new_idl = []
1423    for name in sorted(obs_a.names):
1424        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1425        new_idl.append(obs_a.idl[name])
1426
1427    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1428    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1429    o.reweighted = obs_a.reweighted or obs_b.reweighted
1430    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1433def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1434    r'''Calculates the error covariance matrix of a set of observables.
1435
1436    The gamma method has to be applied first to all observables.
1437
1438    Parameters
1439    ----------
1440    obs : list or numpy.ndarray
1441        List or one dimensional array of Obs
1442    visualize : bool
1443        If True plots the corresponding normalized correlation matrix (default False).
1444    correlation : bool
1445        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1446    smooth : None or int
1447        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1448        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1449        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1450        small ones.
1451
1452    Notes
1453    -----
1454    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1455    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1456    in the absence of autocorrelation.
1457    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1458    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1459    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1460    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1461    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1462    '''
1463
1464    length = len(obs)
1465
1466    max_samples = np.max([o.N for o in obs])
1467    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1468        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1469
1470    cov = np.zeros((length, length))
1471    for i in range(length):
1472        for j in range(i, length):
1473            cov[i, j] = _covariance_element(obs[i], obs[j])
1474    cov = cov + cov.T - np.diag(np.diag(cov))
1475
1476    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1477
1478    if isinstance(smooth, int):
1479        corr = _smooth_eigenvalues(corr, smooth)
1480
1481    if visualize:
1482        plt.matshow(corr, vmin=-1, vmax=1)
1483        plt.set_cmap('RdBu')
1484        plt.colorbar()
1485        plt.draw()
1486
1487    if correlation is True:
1488        return corr
1489
1490    errors = [o.dvalue for o in obs]
1491    cov = np.diag(errors) @ corr @ np.diag(errors)
1492
1493    eigenvalues = np.linalg.eigh(cov)[0]
1494    if not np.all(eigenvalues >= 0):
1495        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1496
1497    return cov

Calculates the error covariance matrix of a set of observables.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None):
1577def import_jackknife(jacks, name, idl=None):
1578    """Imports jackknife samples and returns an Obs
1579
1580    Parameters
1581    ----------
1582    jacks : numpy.ndarray
1583        numpy array containing the mean value as zeroth entry and
1584        the N jackknife samples as first to Nth entry.
1585    name : str
1586        name of the ensemble the samples are defined on.
1587    """
1588    length = len(jacks) - 1
1589    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1590    samples = jacks[1:] @ prj
1591    mean = np.mean(samples)
1592    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1593    new_obs._value = jacks[0]
1594    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def merge_obs(list_of_obs):
1597def merge_obs(list_of_obs):
1598    """Combine all observables in list_of_obs into one new observable
1599
1600    Parameters
1601    ----------
1602    list_of_obs : list
1603        list of the Obs object to be combined
1604
1605    Notes
1606    -----
1607    It is not possible to combine obs which are based on the same replicum
1608    """
1609    replist = [item for obs in list_of_obs for item in obs.names]
1610    if (len(replist) == len(set(replist))) is False:
1611        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1612    if any([len(o.cov_names) for o in list_of_obs]):
1613        raise Exception('Not possible to merge data that contains covobs!')
1614    new_dict = {}
1615    idl_dict = {}
1616    for o in list_of_obs:
1617        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1618                        for key in set(o.deltas) | set(o.r_values)})
1619        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1620
1621    names = sorted(new_dict.keys())
1622    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1623    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1624    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1625    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None):
1628def cov_Obs(means, cov, name, grad=None):
1629    """Create an Obs based on mean(s) and a covariance matrix
1630
1631    Parameters
1632    ----------
1633    mean : list of floats or float
1634        N mean value(s) of the new Obs
1635    cov : list or array
1636        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1637    name : str
1638        identifier for the covariance matrix
1639    grad : list or array
1640        Gradient of the Covobs wrt. the means belonging to cov.
1641    """
1642
1643    def covobs_to_obs(co):
1644        """Make an Obs out of a Covobs
1645
1646        Parameters
1647        ----------
1648        co : Covobs
1649            Covobs to be embedded into the Obs
1650        """
1651        o = Obs([], [], means=[])
1652        o._value = co.value
1653        o.names.append(co.name)
1654        o._covobs[co.name] = co
1655        o._dvalue = np.sqrt(co.errsq())
1656        return o
1657
1658    ol = []
1659    if isinstance(means, (float, int)):
1660        means = [means]
1661
1662    for i in range(len(means)):
1663        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1664    if ol[0].covobs[name].N != len(means):
1665        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1666    if len(ol) == 1:
1667        return ol[0]
1668    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.