pyerrors.obs

   1import warnings
   2import pickle
   3import numpy as np
   4import autograd.numpy as anp  # Thinly-wrapped numpy
   5from autograd import jacobian
   6import matplotlib.pyplot as plt
   7from scipy.stats import skew, skewtest, kurtosis, kurtosistest
   8import numdifftools as nd
   9from itertools import groupby
  10from .covobs import Covobs
  11
  12
  13class Obs:
  14    """Class for a general observable.
  15
  16    Instances of Obs are the basic objects of a pyerrors error analysis.
  17    They are initialized with a list which contains arrays of samples for
  18    different ensembles/replica and another list of same length which contains
  19    the names of the ensembles/replica. Mathematical operations can be
  20    performed on instances. The result is another instance of Obs. The error of
  21    an instance can be computed with the gamma_method. Also contains additional
  22    methods for output and visualization of the error calculation.
  23
  24    Attributes
  25    ----------
  26    S_global : float
  27        Standard value for S (default 2.0)
  28    S_dict : dict
  29        Dictionary for S values. If an entry for a given ensemble
  30        exists this overwrites the standard value for that ensemble.
  31    tau_exp_global : float
  32        Standard value for tau_exp (default 0.0)
  33    tau_exp_dict : dict
  34        Dictionary for tau_exp values. If an entry for a given ensemble exists
  35        this overwrites the standard value for that ensemble.
  36    N_sigma_global : float
  37        Standard value for N_sigma (default 1.0)
  38    N_sigma_dict : dict
  39        Dictionary for N_sigma values. If an entry for a given ensemble exists
  40        this overwrites the standard value for that ensemble.
  41    """
  42    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  43                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  44                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  45                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  46                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
  47
  48    S_global = 2.0
  49    S_dict = {}
  50    tau_exp_global = 0.0
  51    tau_exp_dict = {}
  52    N_sigma_global = 1.0
  53    N_sigma_dict = {}
  54    filter_eps = 1e-10
  55
  56    def __init__(self, samples, names, idl=None, **kwargs):
  57        """ Initialize Obs object.
  58
  59        Parameters
  60        ----------
  61        samples : list
  62            list of numpy arrays containing the Monte Carlo samples
  63        names : list
  64            list of strings labeling the individual samples
  65        idl : list, optional
  66            list of ranges or lists on which the samples are defined
  67        """
  68
  69        if kwargs.get("means") is None and len(samples):
  70            if len(samples) != len(names):
  71                raise Exception('Length of samples and names incompatible.')
  72            if idl is not None:
  73                if len(idl) != len(names):
  74                    raise Exception('Length of idl incompatible with samples and names.')
  75            name_length = len(names)
  76            if name_length > 1:
  77                if name_length != len(set(names)):
  78                    raise Exception('names are not unique.')
  79                if not all(isinstance(x, str) for x in names):
  80                    raise TypeError('All names have to be strings.')
  81            else:
  82                if not isinstance(names[0], str):
  83                    raise TypeError('All names have to be strings.')
  84            if min(len(x) for x in samples) <= 4:
  85                raise Exception('Samples have to have at least 5 entries.')
  86
  87        self.names = sorted(names)
  88        self.shape = {}
  89        self.r_values = {}
  90        self.deltas = {}
  91        self._covobs = {}
  92
  93        self._value = 0
  94        self.N = 0
  95        self.is_merged = {}
  96        self.idl = {}
  97        if idl is not None:
  98            for name, idx in sorted(zip(names, idl)):
  99                if isinstance(idx, range):
 100                    self.idl[name] = idx
 101                elif isinstance(idx, (list, np.ndarray)):
 102                    dc = np.unique(np.diff(idx))
 103                    if np.any(dc < 0):
 104                        raise Exception("Unsorted idx for idl[%s]" % (name))
 105                    if len(dc) == 1:
 106                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 107                    else:
 108                        self.idl[name] = list(idx)
 109                else:
 110                    raise Exception('incompatible type for idl[%s].' % (name))
 111        else:
 112            for name, sample in sorted(zip(names, samples)):
 113                self.idl[name] = range(1, len(sample) + 1)
 114
 115        if kwargs.get("means") is not None:
 116            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 117                self.shape[name] = len(self.idl[name])
 118                self.N += self.shape[name]
 119                self.r_values[name] = mean
 120                self.deltas[name] = sample
 121        else:
 122            for name, sample in sorted(zip(names, samples)):
 123                self.shape[name] = len(self.idl[name])
 124                self.N += self.shape[name]
 125                if len(sample) != self.shape[name]:
 126                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 127                self.r_values[name] = np.mean(sample)
 128                self.deltas[name] = sample - self.r_values[name]
 129                self._value += self.shape[name] * self.r_values[name]
 130            self._value /= self.N
 131
 132        self._dvalue = 0.0
 133        self.ddvalue = 0.0
 134        self.reweighted = False
 135
 136        self.tag = None
 137
 138    @property
 139    def value(self):
 140        return self._value
 141
 142    @property
 143    def dvalue(self):
 144        return self._dvalue
 145
 146    @property
 147    def e_names(self):
 148        return sorted(set([o.split('|')[0] for o in self.names]))
 149
 150    @property
 151    def cov_names(self):
 152        return sorted(set([o for o in self.covobs.keys()]))
 153
 154    @property
 155    def mc_names(self):
 156        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 157
 158    @property
 159    def e_content(self):
 160        res = {}
 161        for e, e_name in enumerate(self.e_names):
 162            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 163            if e_name in self.names:
 164                res[e_name].append(e_name)
 165        return res
 166
 167    @property
 168    def covobs(self):
 169        return self._covobs
 170
 171    def gamma_method(self, **kwargs):
 172        """Estimate the error and related properties of the Obs.
 173
 174        Parameters
 175        ----------
 176        S : float
 177            specifies a custom value for the parameter S (default 2.0).
 178            If set to 0 it is assumed that the data exhibits no
 179            autocorrelation. In this case the error estimates coincides
 180            with the sample standard error.
 181        tau_exp : float
 182            positive value triggers the critical slowing down analysis
 183            (default 0.0).
 184        N_sigma : float
 185            number of standard deviations from zero until the tail is
 186            attached to the autocorrelation function (default 1).
 187        fft : bool
 188            determines whether the fft algorithm is used for the computation
 189            of the autocorrelation function (default True)
 190        """
 191
 192        e_content = self.e_content
 193        self.e_dvalue = {}
 194        self.e_ddvalue = {}
 195        self.e_tauint = {}
 196        self.e_dtauint = {}
 197        self.e_windowsize = {}
 198        self.e_n_tauint = {}
 199        self.e_n_dtauint = {}
 200        e_gamma = {}
 201        self.e_rho = {}
 202        self.e_drho = {}
 203        self._dvalue = 0
 204        self.ddvalue = 0
 205
 206        self.S = {}
 207        self.tau_exp = {}
 208        self.N_sigma = {}
 209
 210        if kwargs.get('fft') is False:
 211            fft = False
 212        else:
 213            fft = True
 214
 215        def _parse_kwarg(kwarg_name):
 216            if kwarg_name in kwargs:
 217                tmp = kwargs.get(kwarg_name)
 218                if isinstance(tmp, (int, float)):
 219                    if tmp < 0:
 220                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 221                    for e, e_name in enumerate(self.e_names):
 222                        getattr(self, kwarg_name)[e_name] = tmp
 223                else:
 224                    raise TypeError(kwarg_name + ' is not in proper format.')
 225            else:
 226                for e, e_name in enumerate(self.e_names):
 227                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 228                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 229                    else:
 230                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 231
 232        _parse_kwarg('S')
 233        _parse_kwarg('tau_exp')
 234        _parse_kwarg('N_sigma')
 235
 236        for e, e_name in enumerate(self.mc_names):
 237            r_length = []
 238            for r_name in e_content[e_name]:
 239                if isinstance(self.idl[r_name], range):
 240                    r_length.append(len(self.idl[r_name]))
 241                else:
 242                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
 243
 244            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 245            w_max = max(r_length) // 2
 246            e_gamma[e_name] = np.zeros(w_max)
 247            self.e_rho[e_name] = np.zeros(w_max)
 248            self.e_drho[e_name] = np.zeros(w_max)
 249
 250            for r_name in e_content[e_name]:
 251                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
 252
 253            gamma_div = np.zeros(w_max)
 254            for r_name in e_content[e_name]:
 255                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
 256            gamma_div[gamma_div < 1] = 1.0
 257            e_gamma[e_name] /= gamma_div[:w_max]
 258
 259            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 260                self.e_tauint[e_name] = 0.5
 261                self.e_dtauint[e_name] = 0.0
 262                self.e_dvalue[e_name] = 0.0
 263                self.e_ddvalue[e_name] = 0.0
 264                self.e_windowsize[e_name] = 0
 265                continue
 266
 267            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 268            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 269            # Make sure no entry of tauint is smaller than 0.5
 270            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 271            # hep-lat/0306017 eq. (42)
 272            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 273            self.e_n_dtauint[e_name][0] = 0.0
 274
 275            def _compute_drho(i):
 276                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
 277                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 278
 279            _compute_drho(1)
 280            if self.tau_exp[e_name] > 0:
 281                texp = self.tau_exp[e_name]
 282                # Critical slowing down analysis
 283                if w_max // 2 <= 1:
 284                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 285                for n in range(1, w_max // 2):
 286                    _compute_drho(n + 1)
 287                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 288                        # Bias correction hep-lat/0306017 eq. (49) included
 289                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 290                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 291                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 292                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 293                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 294                        self.e_windowsize[e_name] = n
 295                        break
 296            else:
 297                if self.S[e_name] == 0.0:
 298                    self.e_tauint[e_name] = 0.5
 299                    self.e_dtauint[e_name] = 0.0
 300                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 301                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 302                    self.e_windowsize[e_name] = 0
 303                else:
 304                    # Standard automatic windowing procedure
 305                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 306                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
 307                    for n in range(1, w_max):
 308                        if n < w_max // 2 - 2:
 309                            _compute_drho(n + 1)
 310                        if g_w[n - 1] < 0 or n >= w_max - 1:
 311                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 312                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 313                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 314                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 315                            self.e_windowsize[e_name] = n
 316                            break
 317
 318            self._dvalue += self.e_dvalue[e_name] ** 2
 319            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 320
 321        for e_name in self.cov_names:
 322            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 323            self.e_ddvalue[e_name] = 0
 324            self._dvalue += self.e_dvalue[e_name]**2
 325
 326        self._dvalue = np.sqrt(self._dvalue)
 327        if self._dvalue == 0.0:
 328            self.ddvalue = 0.0
 329        else:
 330            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 331        return
 332
 333    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
 334        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 335           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 336
 337        Parameters
 338        ----------
 339        deltas : list
 340            List of fluctuations
 341        idx : list
 342            List or range of configurations on which the deltas are defined.
 343        shape : int
 344            Number of configurations in idx.
 345        w_max : int
 346            Upper bound for the summation window.
 347        fft : bool
 348            determines whether the fft algorithm is used for the computation
 349            of the autocorrelation function.
 350        """
 351        gamma = np.zeros(w_max)
 352        deltas = _expand_deltas(deltas, idx, shape)
 353        new_shape = len(deltas)
 354        if fft:
 355            max_gamma = min(new_shape, w_max)
 356            # The padding for the fft has to be even
 357            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 358            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 359        else:
 360            for n in range(w_max):
 361                if new_shape - n >= 0:
 362                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 363
 364        return gamma
 365
 366    def details(self, ens_content=True):
 367        """Output detailed properties of the Obs.
 368
 369        Parameters
 370        ----------
 371        ens_content : bool
 372            print details about the ensembles and replica if true.
 373        """
 374        if self.tag is not None:
 375            print("Description:", self.tag)
 376        if not hasattr(self, 'e_dvalue'):
 377            print('Result\t %3.8e' % (self.value))
 378        else:
 379            if self.value == 0.0:
 380                percentage = np.nan
 381            else:
 382                percentage = np.abs(self._dvalue / self.value) * 100
 383            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 384            if len(self.e_names) > 1:
 385                print(' Ensemble errors:')
 386            for e_name in self.mc_names:
 387                if len(self.e_names) > 1:
 388                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 389                if self.tau_exp[e_name] > 0:
 390                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
 391                else:
 392                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
 393            for e_name in self.cov_names:
 394                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 395        if ens_content is True:
 396            if len(self.e_names) == 1:
 397                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 398            else:
 399                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 400            my_string_list = []
 401            for key, value in sorted(self.e_content.items()):
 402                if key not in self.covobs:
 403                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 404                    if len(value) == 1:
 405                        my_string += f': {self.shape[value[0]]} configurations'
 406                        if isinstance(self.idl[value[0]], range):
 407                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 408                        else:
 409                            my_string += ' (irregular range)'
 410                    else:
 411                        sublist = []
 412                        for v in value:
 413                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 414                            my_substring += f': {self.shape[v]} configurations'
 415                            if isinstance(self.idl[v], range):
 416                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 417                            else:
 418                                my_substring += ' (irregular range)'
 419                            sublist.append(my_substring)
 420
 421                        my_string += '\n' + '\n'.join(sublist)
 422                else:
 423                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 424                my_string_list.append(my_string)
 425            print('\n'.join(my_string_list))
 426
 427    def is_zero_within_error(self, sigma=1):
 428        """Checks whether the observable is zero within 'sigma' standard errors.
 429
 430        Parameters
 431        ----------
 432        sigma : int
 433            Number of standard errors used for the check.
 434
 435        Works only properly when the gamma method was run.
 436        """
 437        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 438
 439    def is_zero(self, atol=1e-10):
 440        """Checks whether the observable is zero within a given tolerance.
 441
 442        Parameters
 443        ----------
 444        atol : float
 445            Absolute tolerance (for details see numpy documentation).
 446        """
 447        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 448
 449    def plot_tauint(self, save=None):
 450        """Plot integrated autocorrelation time for each ensemble.
 451
 452        Parameters
 453        ----------
 454        save : str
 455            saves the figure to a file named 'save' if.
 456        """
 457        if not hasattr(self, 'e_dvalue'):
 458            raise Exception('Run the gamma method first.')
 459
 460        for e, e_name in enumerate(self.mc_names):
 461            fig = plt.figure()
 462            plt.xlabel(r'$W$')
 463            plt.ylabel(r'$\tau_\mathrm{int}$')
 464            length = int(len(self.e_n_tauint[e_name]))
 465            if self.tau_exp[e_name] > 0:
 466                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 467                x_help = np.arange(2 * self.tau_exp[e_name])
 468                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 469                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 470                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 471                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 472                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 473                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 474                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 475            else:
 476                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 477                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 478
 479            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 480            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 481            plt.legend()
 482            plt.xlim(-0.5, xmax)
 483            ylim = plt.ylim()
 484            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 485            plt.draw()
 486            if save:
 487                fig.savefig(save + "_" + str(e))
 488
 489    def plot_rho(self, save=None):
 490        """Plot normalized autocorrelation function time for each ensemble.
 491
 492        Parameters
 493        ----------
 494        save : str
 495            saves the figure to a file named 'save' if.
 496        """
 497        if not hasattr(self, 'e_dvalue'):
 498            raise Exception('Run the gamma method first.')
 499        for e, e_name in enumerate(self.mc_names):
 500            fig = plt.figure()
 501            plt.xlabel('W')
 502            plt.ylabel('rho')
 503            length = int(len(self.e_drho[e_name]))
 504            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 505            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 506            if self.tau_exp[e_name] > 0:
 507                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 508                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 509                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 510                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 511            else:
 512                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 513                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 514            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 515            plt.xlim(-0.5, xmax)
 516            plt.draw()
 517            if save:
 518                fig.savefig(save + "_" + str(e))
 519
 520    def plot_rep_dist(self):
 521        """Plot replica distribution for each ensemble with more than one replicum."""
 522        if not hasattr(self, 'e_dvalue'):
 523            raise Exception('Run the gamma method first.')
 524        for e, e_name in enumerate(self.mc_names):
 525            if len(self.e_content[e_name]) == 1:
 526                print('No replica distribution for a single replicum (', e_name, ')')
 527                continue
 528            r_length = []
 529            sub_r_mean = 0
 530            for r, r_name in enumerate(self.e_content[e_name]):
 531                r_length.append(len(self.deltas[r_name]))
 532                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 533            e_N = np.sum(r_length)
 534            sub_r_mean /= e_N
 535            arr = np.zeros(len(self.e_content[e_name]))
 536            for r, r_name in enumerate(self.e_content[e_name]):
 537                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 538            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 539            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 540            plt.draw()
 541
 542    def plot_history(self, expand=True):
 543        """Plot derived Monte Carlo history for each ensemble
 544
 545        Parameters
 546        ----------
 547        expand : bool
 548            show expanded history for irregular Monte Carlo chains (default: True).
 549        """
 550        for e, e_name in enumerate(self.mc_names):
 551            plt.figure()
 552            r_length = []
 553            tmp = []
 554            tmp_expanded = []
 555            for r, r_name in enumerate(self.e_content[e_name]):
 556                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 557                if expand:
 558                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
 559                    r_length.append(len(tmp_expanded[-1]))
 560                else:
 561                    r_length.append(len(tmp[-1]))
 562            e_N = np.sum(r_length)
 563            x = np.arange(e_N)
 564            y_test = np.concatenate(tmp, axis=0)
 565            if expand:
 566                y = np.concatenate(tmp_expanded, axis=0)
 567            else:
 568                y = y_test
 569            plt.errorbar(x, y, fmt='.', markersize=3)
 570            plt.xlim(-0.5, e_N - 0.5)
 571            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 572            plt.draw()
 573
 574    def plot_piechart(self, save=None):
 575        """Plot piechart which shows the fractional contribution of each
 576        ensemble to the error and returns a dictionary containing the fractions.
 577
 578        Parameters
 579        ----------
 580        save : str
 581            saves the figure to a file named 'save' if.
 582        """
 583        if not hasattr(self, 'e_dvalue'):
 584            raise Exception('Run the gamma method first.')
 585        if np.isclose(0.0, self._dvalue, atol=1e-15):
 586            raise Exception('Error is 0.0')
 587        labels = self.e_names
 588        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 589        fig1, ax1 = plt.subplots()
 590        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 591        ax1.axis('equal')
 592        plt.draw()
 593        if save:
 594            fig1.savefig(save)
 595
 596        return dict(zip(self.e_names, sizes))
 597
 598    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 599        """Dump the Obs to a file 'name' of chosen format.
 600
 601        Parameters
 602        ----------
 603        filename : str
 604            name of the file to be saved.
 605        datatype : str
 606            Format of the exported file. Supported formats include
 607            "json.gz" and "pickle"
 608        description : str
 609            Description for output file, only relevant for json.gz format.
 610        path : str
 611            specifies a custom path for the file (default '.')
 612        """
 613        if 'path' in kwargs:
 614            file_name = kwargs.get('path') + '/' + filename
 615        else:
 616            file_name = filename
 617
 618        if datatype == "json.gz":
 619            from .input.json import dump_to_json
 620            dump_to_json([self], file_name, description=description)
 621        elif datatype == "pickle":
 622            with open(file_name + '.p', 'wb') as fb:
 623                pickle.dump(self, fb)
 624        else:
 625            raise Exception("Unknown datatype " + str(datatype))
 626
 627    def export_jackknife(self):
 628        """Export jackknife samples from the Obs
 629
 630        Returns
 631        -------
 632        numpy.ndarray
 633            Returns a numpy array of length N + 1 where N is the number of samples
 634            for the given ensemble and replicum. The zeroth entry of the array contains
 635            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 636            derived from the Obs. The current implementation only works for observables
 637            defined on exactly one ensemble and replicum. The derived jackknife samples
 638            should agree with samples from a full jackknife analysis up to O(1/N).
 639        """
 640
 641        if len(self.names) != 1:
 642            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 643
 644        name = self.names[0]
 645        full_data = self.deltas[name] + self.r_values[name]
 646        n = full_data.size
 647        mean = self.value
 648        tmp_jacks = np.zeros(n + 1)
 649        tmp_jacks[0] = mean
 650        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 651        return tmp_jacks
 652
 653    def __float__(self):
 654        return float(self.value)
 655
 656    def __repr__(self):
 657        return 'Obs[' + str(self) + ']'
 658
 659    def __str__(self):
 660        if self._dvalue == 0.0:
 661            return str(self.value)
 662        fexp = np.floor(np.log10(self._dvalue))
 663        if fexp < 0.0:
 664            return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
 665        elif fexp == 0.0:
 666            return '{:.1f}({:1.1f})'.format(self.value, self._dvalue)
 667        else:
 668            return '{:.0f}({:2.0f})'.format(self.value, self._dvalue)
 669
 670    # Overload comparisons
 671    def __lt__(self, other):
 672        return self.value < other
 673
 674    def __le__(self, other):
 675        return self.value <= other
 676
 677    def __gt__(self, other):
 678        return self.value > other
 679
 680    def __ge__(self, other):
 681        return self.value >= other
 682
 683    def __eq__(self, other):
 684        return (self - other).is_zero()
 685
 686    def __ne__(self, other):
 687        return not (self - other).is_zero()
 688
 689    # Overload math operations
 690    def __add__(self, y):
 691        if isinstance(y, Obs):
 692            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 693        else:
 694            if isinstance(y, np.ndarray):
 695                return np.array([self + o for o in y])
 696            elif y.__class__.__name__ in ['Corr', 'CObs']:
 697                return NotImplemented
 698            else:
 699                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 700
 701    def __radd__(self, y):
 702        return self + y
 703
 704    def __mul__(self, y):
 705        if isinstance(y, Obs):
 706            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 707        else:
 708            if isinstance(y, np.ndarray):
 709                return np.array([self * o for o in y])
 710            elif isinstance(y, complex):
 711                return CObs(self * y.real, self * y.imag)
 712            elif y.__class__.__name__ in ['Corr', 'CObs']:
 713                return NotImplemented
 714            else:
 715                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 716
 717    def __rmul__(self, y):
 718        return self * y
 719
 720    def __sub__(self, y):
 721        if isinstance(y, Obs):
 722            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 723        else:
 724            if isinstance(y, np.ndarray):
 725                return np.array([self - o for o in y])
 726            elif y.__class__.__name__ in ['Corr', 'CObs']:
 727                return NotImplemented
 728            else:
 729                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 730
 731    def __rsub__(self, y):
 732        return -1 * (self - y)
 733
 734    def __pos__(self):
 735        return self
 736
 737    def __neg__(self):
 738        return -1 * self
 739
 740    def __truediv__(self, y):
 741        if isinstance(y, Obs):
 742            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 743        else:
 744            if isinstance(y, np.ndarray):
 745                return np.array([self / o for o in y])
 746            elif y.__class__.__name__ in ['Corr', 'CObs']:
 747                return NotImplemented
 748            else:
 749                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 750
 751    def __rtruediv__(self, y):
 752        if isinstance(y, Obs):
 753            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 754        else:
 755            if isinstance(y, np.ndarray):
 756                return np.array([o / self for o in y])
 757            elif y.__class__.__name__ in ['Corr', 'CObs']:
 758                return NotImplemented
 759            else:
 760                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 761
 762    def __pow__(self, y):
 763        if isinstance(y, Obs):
 764            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 765        else:
 766            return derived_observable(lambda x: x[0] ** y, [self])
 767
 768    def __rpow__(self, y):
 769        if isinstance(y, Obs):
 770            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 771        else:
 772            return derived_observable(lambda x: y ** x[0], [self])
 773
 774    def __abs__(self):
 775        return derived_observable(lambda x: anp.abs(x[0]), [self])
 776
 777    # Overload numpy functions
 778    def sqrt(self):
 779        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 780
 781    def log(self):
 782        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 783
 784    def exp(self):
 785        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 786
 787    def sin(self):
 788        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 789
 790    def cos(self):
 791        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 792
 793    def tan(self):
 794        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 795
 796    def arcsin(self):
 797        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 798
 799    def arccos(self):
 800        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 801
 802    def arctan(self):
 803        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 804
 805    def sinh(self):
 806        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 807
 808    def cosh(self):
 809        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 810
 811    def tanh(self):
 812        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 813
 814    def arcsinh(self):
 815        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 816
 817    def arccosh(self):
 818        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 819
 820    def arctanh(self):
 821        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 822
 823
 824class CObs:
 825    """Class for a complex valued observable."""
 826    __slots__ = ['_real', '_imag', 'tag']
 827
 828    def __init__(self, real, imag=0.0):
 829        self._real = real
 830        self._imag = imag
 831        self.tag = None
 832
 833    @property
 834    def real(self):
 835        return self._real
 836
 837    @property
 838    def imag(self):
 839        return self._imag
 840
 841    def gamma_method(self, **kwargs):
 842        """Executes the gamma_method for the real and the imaginary part."""
 843        if isinstance(self.real, Obs):
 844            self.real.gamma_method(**kwargs)
 845        if isinstance(self.imag, Obs):
 846            self.imag.gamma_method(**kwargs)
 847
 848    def is_zero(self):
 849        """Checks whether both real and imaginary part are zero within machine precision."""
 850        return self.real == 0.0 and self.imag == 0.0
 851
 852    def conjugate(self):
 853        return CObs(self.real, -self.imag)
 854
 855    def __add__(self, other):
 856        if isinstance(other, np.ndarray):
 857            return other + self
 858        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 859            return CObs(self.real + other.real,
 860                        self.imag + other.imag)
 861        else:
 862            return CObs(self.real + other, self.imag)
 863
 864    def __radd__(self, y):
 865        return self + y
 866
 867    def __sub__(self, other):
 868        if isinstance(other, np.ndarray):
 869            return -1 * (other - self)
 870        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 871            return CObs(self.real - other.real, self.imag - other.imag)
 872        else:
 873            return CObs(self.real - other, self.imag)
 874
 875    def __rsub__(self, other):
 876        return -1 * (self - other)
 877
 878    def __mul__(self, other):
 879        if isinstance(other, np.ndarray):
 880            return other * self
 881        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 882            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 883                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 884                                               [self.real, other.real, self.imag, other.imag],
 885                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 886                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 887                                               [self.real, other.real, self.imag, other.imag],
 888                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 889            elif getattr(other, 'imag', 0) != 0:
 890                return CObs(self.real * other.real - self.imag * other.imag,
 891                            self.imag * other.real + self.real * other.imag)
 892            else:
 893                return CObs(self.real * other.real, self.imag * other.real)
 894        else:
 895            return CObs(self.real * other, self.imag * other)
 896
 897    def __rmul__(self, other):
 898        return self * other
 899
 900    def __truediv__(self, other):
 901        if isinstance(other, np.ndarray):
 902            return 1 / (other / self)
 903        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 904            r = other.real ** 2 + other.imag ** 2
 905            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 906        else:
 907            return CObs(self.real / other, self.imag / other)
 908
 909    def __rtruediv__(self, other):
 910        r = self.real ** 2 + self.imag ** 2
 911        if hasattr(other, 'real') and hasattr(other, 'imag'):
 912            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 913        else:
 914            return CObs(self.real * other / r, -self.imag * other / r)
 915
 916    def __abs__(self):
 917        return np.sqrt(self.real**2 + self.imag**2)
 918
 919    def __pos__(self):
 920        return self
 921
 922    def __neg__(self):
 923        return -1 * self
 924
 925    def __eq__(self, other):
 926        return self.real == other.real and self.imag == other.imag
 927
 928    def __str__(self):
 929        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 930
 931    def __repr__(self):
 932        return 'CObs[' + str(self) + ']'
 933
 934
 935def _expand_deltas(deltas, idx, shape):
 936    """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0.
 937       If idx is of type range, the deltas are not changed
 938
 939    Parameters
 940    ----------
 941    deltas : list
 942        List of fluctuations
 943    idx : list
 944        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
 945    shape : int
 946        Number of configs in idx.
 947    """
 948    if isinstance(idx, range):
 949        return deltas
 950    else:
 951        ret = np.zeros(idx[-1] - idx[0] + 1)
 952        for i in range(shape):
 953            ret[idx[i] - idx[0]] = deltas[i]
 954        return ret
 955
 956
 957def _merge_idx(idl):
 958    """Returns the union of all lists in idl as sorted list
 959
 960    Parameters
 961    ----------
 962    idl : list
 963        List of lists or ranges.
 964    """
 965
 966    # Use groupby to efficiently check whether all elements of idl are identical
 967    try:
 968        g = groupby(idl)
 969        if next(g, True) and not next(g, False):
 970            return idl[0]
 971    except Exception:
 972        pass
 973
 974    if np.all([type(idx) is range for idx in idl]):
 975        if len(set([idx[0] for idx in idl])) == 1:
 976            idstart = min([idx.start for idx in idl])
 977            idstop = max([idx.stop for idx in idl])
 978            idstep = min([idx.step for idx in idl])
 979            return range(idstart, idstop, idstep)
 980
 981    return sorted(set().union(*idl))
 982
 983
 984def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
 985    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
 986       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
 987       common divisor of the step sizes is used as new step size.
 988
 989    Parameters
 990    ----------
 991    deltas : list
 992        List of fluctuations
 993    idx : list
 994        List or range of configs on which the deltas are defined.
 995        Has to be a subset of new_idx and has to be sorted in ascending order.
 996    shape : list
 997        Number of configs in idx.
 998    new_idx : list
 999        List of configs that defines the new range, has to be sorted in ascending order.
1000    """
1001
1002    if type(idx) is range and type(new_idx) is range:
1003        if idx == new_idx:
1004            return deltas
1005    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1006    for i in range(shape):
1007        ret[idx[i] - new_idx[0]] = deltas[i]
1008    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
1009
1010
1011def _filter_zeroes(deltas, idx, eps=Obs.filter_eps):
1012    """Filter out all configurations with vanishing fluctuation such that they do not
1013       contribute to the error estimate anymore. Returns the new deltas and
1014       idx according to the filtering.
1015       A fluctuation is considered to be vanishing, if it is smaller than eps times
1016       the mean of the absolute values of all deltas in one list.
1017
1018    Parameters
1019    ----------
1020    deltas : list
1021        List of fluctuations
1022    idx : list
1023        List or ranges of configs on which the deltas are defined.
1024    eps : float
1025        Prefactor that enters the filter criterion.
1026    """
1027    new_deltas = []
1028    new_idx = []
1029    maxd = np.mean(np.fabs(deltas))
1030    for i in range(len(deltas)):
1031        if abs(deltas[i]) > eps * maxd:
1032            new_deltas.append(deltas[i])
1033            new_idx.append(idx[i])
1034    if new_idx:
1035        return np.array(new_deltas), new_idx
1036    else:
1037        return deltas, idx
1038
1039
1040def derived_observable(func, data, array_mode=False, **kwargs):
1041    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1042
1043    Parameters
1044    ----------
1045    func : object
1046        arbitrary function of the form func(data, **kwargs). For the
1047        automatic differentiation to work, all numpy functions have to have
1048        the autograd wrapper (use 'import autograd.numpy as anp').
1049    data : list
1050        list of Obs, e.g. [obs1, obs2, obs3].
1051    num_grad : bool
1052        if True, numerical derivatives are used instead of autograd
1053        (default False). To control the numerical differentiation the
1054        kwargs of numdifftools.step_generators.MaxStepGenerator
1055        can be used.
1056    man_grad : list
1057        manually supply a list or an array which contains the jacobian
1058        of func. Use cautiously, supplying the wrong derivative will
1059        not be intercepted.
1060
1061    Notes
1062    -----
1063    For simple mathematical operations it can be practical to use anonymous
1064    functions. For the ratio of two observables one can e.g. use
1065
1066    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1067    """
1068
1069    data = np.asarray(data)
1070    raveled_data = data.ravel()
1071
1072    # Workaround for matrix operations containing non Obs data
1073    if not all(isinstance(x, Obs) for x in raveled_data):
1074        for i in range(len(raveled_data)):
1075            if isinstance(raveled_data[i], (int, float)):
1076                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1077
1078    allcov = {}
1079    for o in raveled_data:
1080        for name in o.cov_names:
1081            if name in allcov:
1082                if not np.allclose(allcov[name], o.covobs[name].cov):
1083                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1084            else:
1085                allcov[name] = o.covobs[name].cov
1086
1087    n_obs = len(raveled_data)
1088    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1089    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1090    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1091
1092    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1093    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1094
1095    if data.ndim == 1:
1096        values = np.array([o.value for o in data])
1097    else:
1098        values = np.vectorize(lambda x: x.value)(data)
1099
1100    new_values = func(values, **kwargs)
1101
1102    multi = int(isinstance(new_values, np.ndarray))
1103
1104    new_r_values = {}
1105    new_idl_d = {}
1106    for name in new_sample_names:
1107        idl = []
1108        tmp_values = np.zeros(n_obs)
1109        for i, item in enumerate(raveled_data):
1110            tmp_values[i] = item.r_values.get(name, item.value)
1111            tmp_idl = item.idl.get(name)
1112            if tmp_idl is not None:
1113                idl.append(tmp_idl)
1114        if multi > 0:
1115            tmp_values = np.array(tmp_values).reshape(data.shape)
1116        new_r_values[name] = func(tmp_values, **kwargs)
1117        new_idl_d[name] = _merge_idx(idl)
1118        if not is_merged[name]:
1119            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1120
1121    if 'man_grad' in kwargs:
1122        deriv = np.asarray(kwargs.get('man_grad'))
1123        if new_values.shape + data.shape != deriv.shape:
1124            raise Exception('Manual derivative does not have correct shape.')
1125    elif kwargs.get('num_grad') is True:
1126        if multi > 0:
1127            raise Exception('Multi mode currently not supported for numerical derivative')
1128        options = {
1129            'base_step': 0.1,
1130            'step_ratio': 2.5}
1131        for key in options.keys():
1132            kwarg = kwargs.get(key)
1133            if kwarg is not None:
1134                options[key] = kwarg
1135        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1136        if tmp_df.size == 1:
1137            deriv = np.array([tmp_df.real])
1138        else:
1139            deriv = tmp_df.real
1140    else:
1141        deriv = jacobian(func)(values, **kwargs)
1142
1143    final_result = np.zeros(new_values.shape, dtype=object)
1144
1145    if array_mode is True:
1146
1147        class _Zero_grad():
1148            def __init__(self, N):
1149                self.grad = np.zeros((N, 1))
1150
1151        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1152        d_extracted = {}
1153        g_extracted = {}
1154        for name in new_sample_names:
1155            d_extracted[name] = []
1156            ens_length = len(new_idl_d[name])
1157            for i_dat, dat in enumerate(data):
1158                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1159        for name in new_cov_names:
1160            g_extracted[name] = []
1161            zero_grad = _Zero_grad(new_covobs_lengths[name])
1162            for i_dat, dat in enumerate(data):
1163                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1164
1165    for i_val, new_val in np.ndenumerate(new_values):
1166        new_deltas = {}
1167        new_grad = {}
1168        if array_mode is True:
1169            for name in new_sample_names:
1170                ens_length = d_extracted[name][0].shape[-1]
1171                new_deltas[name] = np.zeros(ens_length)
1172                for i_dat, dat in enumerate(d_extracted[name]):
1173                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1174            for name in new_cov_names:
1175                new_grad[name] = 0
1176                for i_dat, dat in enumerate(g_extracted[name]):
1177                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1178        else:
1179            for j_obs, obs in np.ndenumerate(data):
1180                for name in obs.names:
1181                    if name in obs.cov_names:
1182                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1183                    else:
1184                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1185
1186        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1187
1188        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1189            raise Exception('The same name has been used for deltas and covobs!')
1190        new_samples = []
1191        new_means = []
1192        new_idl = []
1193        new_names_obs = []
1194        for name in new_names:
1195            if name not in new_covobs:
1196                if is_merged[name]:
1197                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1198                else:
1199                    filtered_deltas = new_deltas[name]
1200                    filtered_idl_d = new_idl_d[name]
1201
1202                new_samples.append(filtered_deltas)
1203                new_idl.append(filtered_idl_d)
1204                new_means.append(new_r_values[name][i_val])
1205                new_names_obs.append(name)
1206        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1207        for name in new_covobs:
1208            final_result[i_val].names.append(name)
1209        final_result[i_val]._covobs = new_covobs
1210        final_result[i_val]._value = new_val
1211        final_result[i_val].is_merged = is_merged
1212        final_result[i_val].reweighted = reweighted
1213
1214    if multi == 0:
1215        final_result = final_result.item()
1216
1217    return final_result
1218
1219
1220def _reduce_deltas(deltas, idx_old, idx_new):
1221    """Extract deltas defined on idx_old on all configs of idx_new.
1222
1223    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1224    are ordered in an ascending order.
1225
1226    Parameters
1227    ----------
1228    deltas : list
1229        List of fluctuations
1230    idx_old : list
1231        List or range of configs on which the deltas are defined
1232    idx_new : list
1233        List of configs for which we want to extract the deltas.
1234        Has to be a subset of idx_old.
1235    """
1236    if not len(deltas) == len(idx_old):
1237        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1238    if type(idx_old) is range and type(idx_new) is range:
1239        if idx_old == idx_new:
1240            return deltas
1241    shape = len(idx_new)
1242    ret = np.zeros(shape)
1243    oldpos = 0
1244    for i in range(shape):
1245        pos = -1
1246        for j in range(oldpos, len(idx_old)):
1247            if idx_old[j] == idx_new[i]:
1248                pos = j
1249                break
1250        if pos < 0:
1251            raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i]))
1252        ret[i] = deltas[pos]
1253        oldpos = pos
1254    return np.array(ret)
1255
1256
1257def reweight(weight, obs, **kwargs):
1258    """Reweight a list of observables.
1259
1260    Parameters
1261    ----------
1262    weight : Obs
1263        Reweighting factor. An Observable that has to be defined on a superset of the
1264        configurations in obs[i].idl for all i.
1265    obs : list
1266        list of Obs, e.g. [obs1, obs2, obs3].
1267    all_configs : bool
1268        if True, the reweighted observables are normalized by the average of
1269        the reweighting factor on all configurations in weight.idl and not
1270        on the configurations in obs[i].idl.
1271    """
1272    result = []
1273    for i in range(len(obs)):
1274        if len(obs[i].cov_names):
1275            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1276        if not set(obs[i].names).issubset(weight.names):
1277            raise Exception('Error: Ensembles do not fit')
1278        for name in obs[i].names:
1279            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1280                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1281        new_samples = []
1282        w_deltas = {}
1283        for name in sorted(obs[i].names):
1284            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1285            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1286        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1287
1288        if kwargs.get('all_configs'):
1289            new_weight = weight
1290        else:
1291            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1292
1293        result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs))
1294        result[-1].reweighted = True
1295        result[-1].is_merged = obs[i].is_merged
1296
1297    return result
1298
1299
1300def correlate(obs_a, obs_b):
1301    """Correlate two observables.
1302
1303    Parameters
1304    ----------
1305    obs_a : Obs
1306        First observable
1307    obs_b : Obs
1308        Second observable
1309
1310    Notes
1311    -----
1312    Keep in mind to only correlate primary observables which have not been reweighted
1313    yet. The reweighting has to be applied after correlating the observables.
1314    Currently only works if ensembles are identical (this is not strictly necessary).
1315    """
1316
1317    if sorted(obs_a.names) != sorted(obs_b.names):
1318        raise Exception('Ensembles do not fit')
1319    if len(obs_a.cov_names) or len(obs_b.cov_names):
1320        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1321    for name in obs_a.names:
1322        if obs_a.shape[name] != obs_b.shape[name]:
1323            raise Exception('Shapes of ensemble', name, 'do not fit')
1324        if obs_a.idl[name] != obs_b.idl[name]:
1325            raise Exception('idl of ensemble', name, 'do not fit')
1326
1327    if obs_a.reweighted is True:
1328        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1329    if obs_b.reweighted is True:
1330        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1331
1332    new_samples = []
1333    new_idl = []
1334    for name in sorted(obs_a.names):
1335        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1336        new_idl.append(obs_a.idl[name])
1337
1338    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1339    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1340    o.reweighted = obs_a.reweighted or obs_b.reweighted
1341    return o
1342
1343
1344def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1345    r'''Calculates the error covariance matrix of a set of observables.
1346
1347    The gamma method has to be applied first to all observables.
1348
1349    Parameters
1350    ----------
1351    obs : list or numpy.ndarray
1352        List or one dimensional array of Obs
1353    visualize : bool
1354        If True plots the corresponding normalized correlation matrix (default False).
1355    correlation : bool
1356        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1357    smooth : None or int
1358        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1359        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1360        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1361        small ones.
1362
1363    Notes
1364    -----
1365    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1366    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1367    in the absence of autocorrelation.
1368    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1369    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1370    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1371    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1372    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1373    '''
1374
1375    length = len(obs)
1376
1377    max_samples = np.max([o.N for o in obs])
1378    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1379        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1380
1381    cov = np.zeros((length, length))
1382    for i in range(length):
1383        for j in range(i, length):
1384            cov[i, j] = _covariance_element(obs[i], obs[j])
1385    cov = cov + cov.T - np.diag(np.diag(cov))
1386
1387    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1388
1389    if isinstance(smooth, int):
1390        corr = _smooth_eigenvalues(corr, smooth)
1391
1392    errors = [o.dvalue for o in obs]
1393    cov = np.diag(errors) @ corr @ np.diag(errors)
1394
1395    eigenvalues = np.linalg.eigh(cov)[0]
1396    if not np.all(eigenvalues >= 0):
1397        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1398
1399    if visualize:
1400        plt.matshow(corr, vmin=-1, vmax=1)
1401        plt.set_cmap('RdBu')
1402        plt.colorbar()
1403        plt.draw()
1404
1405    if correlation is True:
1406        return corr
1407    else:
1408        return cov
1409
1410
1411def _smooth_eigenvalues(corr, E):
1412    """Eigenvalue smoothing as described in hep-lat/9412087
1413
1414    corr : np.ndarray
1415        correlation matrix
1416    E : integer
1417        Number of eigenvalues to be left substantially unchanged
1418    """
1419    if not (2 < E < corr.shape[0] - 1):
1420        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1421    vals, vec = np.linalg.eigh(corr)
1422    lambda_min = np.mean(vals[:-E])
1423    vals[vals < lambda_min] = lambda_min
1424    vals /= np.mean(vals)
1425    return vec @ np.diag(vals) @ vec.T
1426
1427
1428def _covariance_element(obs1, obs2):
1429    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1430
1431    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1432        deltas1 = _expand_deltas_for_merge(deltas1, idx1, len(idx1), new_idx)
1433        deltas2 = _expand_deltas_for_merge(deltas2, idx2, len(idx2), new_idx)
1434        return np.sum(deltas1 * deltas2)
1435
1436    if set(obs1.names).isdisjoint(set(obs2.names)):
1437        return 0.0
1438
1439    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1440        raise Exception('The gamma method has to be applied to both Obs first.')
1441
1442    dvalue = 0.0
1443
1444    for e_name in obs1.mc_names:
1445
1446        if e_name not in obs2.mc_names:
1447            continue
1448
1449        idl_d = {}
1450        for r_name in obs1.e_content[e_name]:
1451            if r_name not in obs2.e_content[e_name]:
1452                continue
1453            idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]])
1454
1455        gamma = 0.0
1456
1457        for r_name in obs1.e_content[e_name]:
1458            if r_name not in obs2.e_content[e_name]:
1459                continue
1460            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1461
1462        if gamma == 0.0:
1463            continue
1464
1465        gamma_div = 0.0
1466        e_N = 0
1467        for r_name in obs1.e_content[e_name]:
1468            if r_name not in obs2.e_content[e_name]:
1469                continue
1470            gamma_div += calc_gamma(np.ones(obs1.shape[r_name]), np.ones(obs2.shape[r_name]), obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1471            e_N += len(idl_d[r_name])
1472        gamma /= max(gamma_div, 1.0)
1473
1474        # Bias correction hep-lat/0306017 eq. (49)
1475        dvalue += (1 + 1 / e_N) * gamma / e_N
1476
1477    for e_name in obs1.cov_names:
1478
1479        if e_name not in obs2.cov_names:
1480            continue
1481
1482        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1483
1484    return dvalue
1485
1486
1487def import_jackknife(jacks, name, idl=None):
1488    """Imports jackknife samples and returns an Obs
1489
1490    Parameters
1491    ----------
1492    jacks : numpy.ndarray
1493        numpy array containing the mean value as zeroth entry and
1494        the N jackknife samples as first to Nth entry.
1495    name : str
1496        name of the ensemble the samples are defined on.
1497    """
1498    length = len(jacks) - 1
1499    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1500    samples = jacks[1:] @ prj
1501    mean = np.mean(samples)
1502    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1503    new_obs._value = jacks[0]
1504    return new_obs
1505
1506
1507def merge_obs(list_of_obs):
1508    """Combine all observables in list_of_obs into one new observable
1509
1510    Parameters
1511    ----------
1512    list_of_obs : list
1513        list of the Obs object to be combined
1514
1515    Notes
1516    -----
1517    It is not possible to combine obs which are based on the same replicum
1518    """
1519    replist = [item for obs in list_of_obs for item in obs.names]
1520    if (len(replist) == len(set(replist))) is False:
1521        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1522    if any([len(o.cov_names) for o in list_of_obs]):
1523        raise Exception('Not possible to merge data that contains covobs!')
1524    new_dict = {}
1525    idl_dict = {}
1526    for o in list_of_obs:
1527        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1528                        for key in set(o.deltas) | set(o.r_values)})
1529        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1530
1531    names = sorted(new_dict.keys())
1532    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1533    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1534    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1535    return o
1536
1537
1538def cov_Obs(means, cov, name, grad=None):
1539    """Create an Obs based on mean(s) and a covariance matrix
1540
1541    Parameters
1542    ----------
1543    mean : list of floats or float
1544        N mean value(s) of the new Obs
1545    cov : list or array
1546        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1547    name : str
1548        identifier for the covariance matrix
1549    grad : list or array
1550        Gradient of the Covobs wrt. the means belonging to cov.
1551    """
1552
1553    def covobs_to_obs(co):
1554        """Make an Obs out of a Covobs
1555
1556        Parameters
1557        ----------
1558        co : Covobs
1559            Covobs to be embedded into the Obs
1560        """
1561        o = Obs([], [], means=[])
1562        o._value = co.value
1563        o.names.append(co.name)
1564        o._covobs[co.name] = co
1565        o._dvalue = np.sqrt(co.errsq())
1566        return o
1567
1568    ol = []
1569    if isinstance(means, (float, int)):
1570        means = [means]
1571
1572    for i in range(len(means)):
1573        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1574    if ol[0].covobs[name].N != len(means):
1575        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1576    if len(ol) == 1:
1577        return ol[0]
1578    return ol
class Obs:
 14class Obs:
 15    """Class for a general observable.
 16
 17    Instances of Obs are the basic objects of a pyerrors error analysis.
 18    They are initialized with a list which contains arrays of samples for
 19    different ensembles/replica and another list of same length which contains
 20    the names of the ensembles/replica. Mathematical operations can be
 21    performed on instances. The result is another instance of Obs. The error of
 22    an instance can be computed with the gamma_method. Also contains additional
 23    methods for output and visualization of the error calculation.
 24
 25    Attributes
 26    ----------
 27    S_global : float
 28        Standard value for S (default 2.0)
 29    S_dict : dict
 30        Dictionary for S values. If an entry for a given ensemble
 31        exists this overwrites the standard value for that ensemble.
 32    tau_exp_global : float
 33        Standard value for tau_exp (default 0.0)
 34    tau_exp_dict : dict
 35        Dictionary for tau_exp values. If an entry for a given ensemble exists
 36        this overwrites the standard value for that ensemble.
 37    N_sigma_global : float
 38        Standard value for N_sigma (default 1.0)
 39    N_sigma_dict : dict
 40        Dictionary for N_sigma values. If an entry for a given ensemble exists
 41        this overwrites the standard value for that ensemble.
 42    """
 43    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 44                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 45                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 46                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 47                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
 48
 49    S_global = 2.0
 50    S_dict = {}
 51    tau_exp_global = 0.0
 52    tau_exp_dict = {}
 53    N_sigma_global = 1.0
 54    N_sigma_dict = {}
 55    filter_eps = 1e-10
 56
 57    def __init__(self, samples, names, idl=None, **kwargs):
 58        """ Initialize Obs object.
 59
 60        Parameters
 61        ----------
 62        samples : list
 63            list of numpy arrays containing the Monte Carlo samples
 64        names : list
 65            list of strings labeling the individual samples
 66        idl : list, optional
 67            list of ranges or lists on which the samples are defined
 68        """
 69
 70        if kwargs.get("means") is None and len(samples):
 71            if len(samples) != len(names):
 72                raise Exception('Length of samples and names incompatible.')
 73            if idl is not None:
 74                if len(idl) != len(names):
 75                    raise Exception('Length of idl incompatible with samples and names.')
 76            name_length = len(names)
 77            if name_length > 1:
 78                if name_length != len(set(names)):
 79                    raise Exception('names are not unique.')
 80                if not all(isinstance(x, str) for x in names):
 81                    raise TypeError('All names have to be strings.')
 82            else:
 83                if not isinstance(names[0], str):
 84                    raise TypeError('All names have to be strings.')
 85            if min(len(x) for x in samples) <= 4:
 86                raise Exception('Samples have to have at least 5 entries.')
 87
 88        self.names = sorted(names)
 89        self.shape = {}
 90        self.r_values = {}
 91        self.deltas = {}
 92        self._covobs = {}
 93
 94        self._value = 0
 95        self.N = 0
 96        self.is_merged = {}
 97        self.idl = {}
 98        if idl is not None:
 99            for name, idx in sorted(zip(names, idl)):
100                if isinstance(idx, range):
101                    self.idl[name] = idx
102                elif isinstance(idx, (list, np.ndarray)):
103                    dc = np.unique(np.diff(idx))
104                    if np.any(dc < 0):
105                        raise Exception("Unsorted idx for idl[%s]" % (name))
106                    if len(dc) == 1:
107                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
108                    else:
109                        self.idl[name] = list(idx)
110                else:
111                    raise Exception('incompatible type for idl[%s].' % (name))
112        else:
113            for name, sample in sorted(zip(names, samples)):
114                self.idl[name] = range(1, len(sample) + 1)
115
116        if kwargs.get("means") is not None:
117            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
118                self.shape[name] = len(self.idl[name])
119                self.N += self.shape[name]
120                self.r_values[name] = mean
121                self.deltas[name] = sample
122        else:
123            for name, sample in sorted(zip(names, samples)):
124                self.shape[name] = len(self.idl[name])
125                self.N += self.shape[name]
126                if len(sample) != self.shape[name]:
127                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
128                self.r_values[name] = np.mean(sample)
129                self.deltas[name] = sample - self.r_values[name]
130                self._value += self.shape[name] * self.r_values[name]
131            self._value /= self.N
132
133        self._dvalue = 0.0
134        self.ddvalue = 0.0
135        self.reweighted = False
136
137        self.tag = None
138
139    @property
140    def value(self):
141        return self._value
142
143    @property
144    def dvalue(self):
145        return self._dvalue
146
147    @property
148    def e_names(self):
149        return sorted(set([o.split('|')[0] for o in self.names]))
150
151    @property
152    def cov_names(self):
153        return sorted(set([o for o in self.covobs.keys()]))
154
155    @property
156    def mc_names(self):
157        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
158
159    @property
160    def e_content(self):
161        res = {}
162        for e, e_name in enumerate(self.e_names):
163            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
164            if e_name in self.names:
165                res[e_name].append(e_name)
166        return res
167
168    @property
169    def covobs(self):
170        return self._covobs
171
172    def gamma_method(self, **kwargs):
173        """Estimate the error and related properties of the Obs.
174
175        Parameters
176        ----------
177        S : float
178            specifies a custom value for the parameter S (default 2.0).
179            If set to 0 it is assumed that the data exhibits no
180            autocorrelation. In this case the error estimates coincides
181            with the sample standard error.
182        tau_exp : float
183            positive value triggers the critical slowing down analysis
184            (default 0.0).
185        N_sigma : float
186            number of standard deviations from zero until the tail is
187            attached to the autocorrelation function (default 1).
188        fft : bool
189            determines whether the fft algorithm is used for the computation
190            of the autocorrelation function (default True)
191        """
192
193        e_content = self.e_content
194        self.e_dvalue = {}
195        self.e_ddvalue = {}
196        self.e_tauint = {}
197        self.e_dtauint = {}
198        self.e_windowsize = {}
199        self.e_n_tauint = {}
200        self.e_n_dtauint = {}
201        e_gamma = {}
202        self.e_rho = {}
203        self.e_drho = {}
204        self._dvalue = 0
205        self.ddvalue = 0
206
207        self.S = {}
208        self.tau_exp = {}
209        self.N_sigma = {}
210
211        if kwargs.get('fft') is False:
212            fft = False
213        else:
214            fft = True
215
216        def _parse_kwarg(kwarg_name):
217            if kwarg_name in kwargs:
218                tmp = kwargs.get(kwarg_name)
219                if isinstance(tmp, (int, float)):
220                    if tmp < 0:
221                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
222                    for e, e_name in enumerate(self.e_names):
223                        getattr(self, kwarg_name)[e_name] = tmp
224                else:
225                    raise TypeError(kwarg_name + ' is not in proper format.')
226            else:
227                for e, e_name in enumerate(self.e_names):
228                    if e_name in getattr(Obs, kwarg_name + '_dict'):
229                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
230                    else:
231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
232
233        _parse_kwarg('S')
234        _parse_kwarg('tau_exp')
235        _parse_kwarg('N_sigma')
236
237        for e, e_name in enumerate(self.mc_names):
238            r_length = []
239            for r_name in e_content[e_name]:
240                if isinstance(self.idl[r_name], range):
241                    r_length.append(len(self.idl[r_name]))
242                else:
243                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
244
245            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
246            w_max = max(r_length) // 2
247            e_gamma[e_name] = np.zeros(w_max)
248            self.e_rho[e_name] = np.zeros(w_max)
249            self.e_drho[e_name] = np.zeros(w_max)
250
251            for r_name in e_content[e_name]:
252                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
253
254            gamma_div = np.zeros(w_max)
255            for r_name in e_content[e_name]:
256                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
257            gamma_div[gamma_div < 1] = 1.0
258            e_gamma[e_name] /= gamma_div[:w_max]
259
260            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
261                self.e_tauint[e_name] = 0.5
262                self.e_dtauint[e_name] = 0.0
263                self.e_dvalue[e_name] = 0.0
264                self.e_ddvalue[e_name] = 0.0
265                self.e_windowsize[e_name] = 0
266                continue
267
268            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
269            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
270            # Make sure no entry of tauint is smaller than 0.5
271            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
272            # hep-lat/0306017 eq. (42)
273            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
274            self.e_n_dtauint[e_name][0] = 0.0
275
276            def _compute_drho(i):
277                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
278                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
279
280            _compute_drho(1)
281            if self.tau_exp[e_name] > 0:
282                texp = self.tau_exp[e_name]
283                # Critical slowing down analysis
284                if w_max // 2 <= 1:
285                    raise Exception("Need at least 8 samples for tau_exp error analysis")
286                for n in range(1, w_max // 2):
287                    _compute_drho(n + 1)
288                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
289                        # Bias correction hep-lat/0306017 eq. (49) included
290                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
291                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
292                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
293                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
294                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
295                        self.e_windowsize[e_name] = n
296                        break
297            else:
298                if self.S[e_name] == 0.0:
299                    self.e_tauint[e_name] = 0.5
300                    self.e_dtauint[e_name] = 0.0
301                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
302                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
303                    self.e_windowsize[e_name] = 0
304                else:
305                    # Standard automatic windowing procedure
306                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
307                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
308                    for n in range(1, w_max):
309                        if n < w_max // 2 - 2:
310                            _compute_drho(n + 1)
311                        if g_w[n - 1] < 0 or n >= w_max - 1:
312                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
313                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
314                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
315                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
316                            self.e_windowsize[e_name] = n
317                            break
318
319            self._dvalue += self.e_dvalue[e_name] ** 2
320            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
321
322        for e_name in self.cov_names:
323            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
324            self.e_ddvalue[e_name] = 0
325            self._dvalue += self.e_dvalue[e_name]**2
326
327        self._dvalue = np.sqrt(self._dvalue)
328        if self._dvalue == 0.0:
329            self.ddvalue = 0.0
330        else:
331            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
332        return
333
334    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
335        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
336           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
337
338        Parameters
339        ----------
340        deltas : list
341            List of fluctuations
342        idx : list
343            List or range of configurations on which the deltas are defined.
344        shape : int
345            Number of configurations in idx.
346        w_max : int
347            Upper bound for the summation window.
348        fft : bool
349            determines whether the fft algorithm is used for the computation
350            of the autocorrelation function.
351        """
352        gamma = np.zeros(w_max)
353        deltas = _expand_deltas(deltas, idx, shape)
354        new_shape = len(deltas)
355        if fft:
356            max_gamma = min(new_shape, w_max)
357            # The padding for the fft has to be even
358            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
359            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
360        else:
361            for n in range(w_max):
362                if new_shape - n >= 0:
363                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
364
365        return gamma
366
367    def details(self, ens_content=True):
368        """Output detailed properties of the Obs.
369
370        Parameters
371        ----------
372        ens_content : bool
373            print details about the ensembles and replica if true.
374        """
375        if self.tag is not None:
376            print("Description:", self.tag)
377        if not hasattr(self, 'e_dvalue'):
378            print('Result\t %3.8e' % (self.value))
379        else:
380            if self.value == 0.0:
381                percentage = np.nan
382            else:
383                percentage = np.abs(self._dvalue / self.value) * 100
384            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
385            if len(self.e_names) > 1:
386                print(' Ensemble errors:')
387            for e_name in self.mc_names:
388                if len(self.e_names) > 1:
389                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
390                if self.tau_exp[e_name] > 0:
391                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
392                else:
393                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
394            for e_name in self.cov_names:
395                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
396        if ens_content is True:
397            if len(self.e_names) == 1:
398                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
399            else:
400                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
401            my_string_list = []
402            for key, value in sorted(self.e_content.items()):
403                if key not in self.covobs:
404                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
405                    if len(value) == 1:
406                        my_string += f': {self.shape[value[0]]} configurations'
407                        if isinstance(self.idl[value[0]], range):
408                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
409                        else:
410                            my_string += ' (irregular range)'
411                    else:
412                        sublist = []
413                        for v in value:
414                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
415                            my_substring += f': {self.shape[v]} configurations'
416                            if isinstance(self.idl[v], range):
417                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
418                            else:
419                                my_substring += ' (irregular range)'
420                            sublist.append(my_substring)
421
422                        my_string += '\n' + '\n'.join(sublist)
423                else:
424                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
425                my_string_list.append(my_string)
426            print('\n'.join(my_string_list))
427
428    def is_zero_within_error(self, sigma=1):
429        """Checks whether the observable is zero within 'sigma' standard errors.
430
431        Parameters
432        ----------
433        sigma : int
434            Number of standard errors used for the check.
435
436        Works only properly when the gamma method was run.
437        """
438        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
439
440    def is_zero(self, atol=1e-10):
441        """Checks whether the observable is zero within a given tolerance.
442
443        Parameters
444        ----------
445        atol : float
446            Absolute tolerance (for details see numpy documentation).
447        """
448        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
449
450    def plot_tauint(self, save=None):
451        """Plot integrated autocorrelation time for each ensemble.
452
453        Parameters
454        ----------
455        save : str
456            saves the figure to a file named 'save' if.
457        """
458        if not hasattr(self, 'e_dvalue'):
459            raise Exception('Run the gamma method first.')
460
461        for e, e_name in enumerate(self.mc_names):
462            fig = plt.figure()
463            plt.xlabel(r'$W$')
464            plt.ylabel(r'$\tau_\mathrm{int}$')
465            length = int(len(self.e_n_tauint[e_name]))
466            if self.tau_exp[e_name] > 0:
467                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
468                x_help = np.arange(2 * self.tau_exp[e_name])
469                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
470                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
471                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
472                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
473                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
474                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
475                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
476            else:
477                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
478                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
479
480            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
481            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
482            plt.legend()
483            plt.xlim(-0.5, xmax)
484            ylim = plt.ylim()
485            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
486            plt.draw()
487            if save:
488                fig.savefig(save + "_" + str(e))
489
490    def plot_rho(self, save=None):
491        """Plot normalized autocorrelation function time for each ensemble.
492
493        Parameters
494        ----------
495        save : str
496            saves the figure to a file named 'save' if.
497        """
498        if not hasattr(self, 'e_dvalue'):
499            raise Exception('Run the gamma method first.')
500        for e, e_name in enumerate(self.mc_names):
501            fig = plt.figure()
502            plt.xlabel('W')
503            plt.ylabel('rho')
504            length = int(len(self.e_drho[e_name]))
505            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
506            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
507            if self.tau_exp[e_name] > 0:
508                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
509                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
510                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
511                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
512            else:
513                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
514                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
515            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
516            plt.xlim(-0.5, xmax)
517            plt.draw()
518            if save:
519                fig.savefig(save + "_" + str(e))
520
521    def plot_rep_dist(self):
522        """Plot replica distribution for each ensemble with more than one replicum."""
523        if not hasattr(self, 'e_dvalue'):
524            raise Exception('Run the gamma method first.')
525        for e, e_name in enumerate(self.mc_names):
526            if len(self.e_content[e_name]) == 1:
527                print('No replica distribution for a single replicum (', e_name, ')')
528                continue
529            r_length = []
530            sub_r_mean = 0
531            for r, r_name in enumerate(self.e_content[e_name]):
532                r_length.append(len(self.deltas[r_name]))
533                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
534            e_N = np.sum(r_length)
535            sub_r_mean /= e_N
536            arr = np.zeros(len(self.e_content[e_name]))
537            for r, r_name in enumerate(self.e_content[e_name]):
538                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
539            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
540            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
541            plt.draw()
542
543    def plot_history(self, expand=True):
544        """Plot derived Monte Carlo history for each ensemble
545
546        Parameters
547        ----------
548        expand : bool
549            show expanded history for irregular Monte Carlo chains (default: True).
550        """
551        for e, e_name in enumerate(self.mc_names):
552            plt.figure()
553            r_length = []
554            tmp = []
555            tmp_expanded = []
556            for r, r_name in enumerate(self.e_content[e_name]):
557                tmp.append(self.deltas[r_name] + self.r_values[r_name])
558                if expand:
559                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
560                    r_length.append(len(tmp_expanded[-1]))
561                else:
562                    r_length.append(len(tmp[-1]))
563            e_N = np.sum(r_length)
564            x = np.arange(e_N)
565            y_test = np.concatenate(tmp, axis=0)
566            if expand:
567                y = np.concatenate(tmp_expanded, axis=0)
568            else:
569                y = y_test
570            plt.errorbar(x, y, fmt='.', markersize=3)
571            plt.xlim(-0.5, e_N - 0.5)
572            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
573            plt.draw()
574
575    def plot_piechart(self, save=None):
576        """Plot piechart which shows the fractional contribution of each
577        ensemble to the error and returns a dictionary containing the fractions.
578
579        Parameters
580        ----------
581        save : str
582            saves the figure to a file named 'save' if.
583        """
584        if not hasattr(self, 'e_dvalue'):
585            raise Exception('Run the gamma method first.')
586        if np.isclose(0.0, self._dvalue, atol=1e-15):
587            raise Exception('Error is 0.0')
588        labels = self.e_names
589        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
590        fig1, ax1 = plt.subplots()
591        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
592        ax1.axis('equal')
593        plt.draw()
594        if save:
595            fig1.savefig(save)
596
597        return dict(zip(self.e_names, sizes))
598
599    def dump(self, filename, datatype="json.gz", description="", **kwargs):
600        """Dump the Obs to a file 'name' of chosen format.
601
602        Parameters
603        ----------
604        filename : str
605            name of the file to be saved.
606        datatype : str
607            Format of the exported file. Supported formats include
608            "json.gz" and "pickle"
609        description : str
610            Description for output file, only relevant for json.gz format.
611        path : str
612            specifies a custom path for the file (default '.')
613        """
614        if 'path' in kwargs:
615            file_name = kwargs.get('path') + '/' + filename
616        else:
617            file_name = filename
618
619        if datatype == "json.gz":
620            from .input.json import dump_to_json
621            dump_to_json([self], file_name, description=description)
622        elif datatype == "pickle":
623            with open(file_name + '.p', 'wb') as fb:
624                pickle.dump(self, fb)
625        else:
626            raise Exception("Unknown datatype " + str(datatype))
627
628    def export_jackknife(self):
629        """Export jackknife samples from the Obs
630
631        Returns
632        -------
633        numpy.ndarray
634            Returns a numpy array of length N + 1 where N is the number of samples
635            for the given ensemble and replicum. The zeroth entry of the array contains
636            the mean value of the Obs, entries 1 to N contain the N jackknife samples
637            derived from the Obs. The current implementation only works for observables
638            defined on exactly one ensemble and replicum. The derived jackknife samples
639            should agree with samples from a full jackknife analysis up to O(1/N).
640        """
641
642        if len(self.names) != 1:
643            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
644
645        name = self.names[0]
646        full_data = self.deltas[name] + self.r_values[name]
647        n = full_data.size
648        mean = self.value
649        tmp_jacks = np.zeros(n + 1)
650        tmp_jacks[0] = mean
651        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
652        return tmp_jacks
653
654    def __float__(self):
655        return float(self.value)
656
657    def __repr__(self):
658        return 'Obs[' + str(self) + ']'
659
660    def __str__(self):
661        if self._dvalue == 0.0:
662            return str(self.value)
663        fexp = np.floor(np.log10(self._dvalue))
664        if fexp < 0.0:
665            return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
666        elif fexp == 0.0:
667            return '{:.1f}({:1.1f})'.format(self.value, self._dvalue)
668        else:
669            return '{:.0f}({:2.0f})'.format(self.value, self._dvalue)
670
671    # Overload comparisons
672    def __lt__(self, other):
673        return self.value < other
674
675    def __le__(self, other):
676        return self.value <= other
677
678    def __gt__(self, other):
679        return self.value > other
680
681    def __ge__(self, other):
682        return self.value >= other
683
684    def __eq__(self, other):
685        return (self - other).is_zero()
686
687    def __ne__(self, other):
688        return not (self - other).is_zero()
689
690    # Overload math operations
691    def __add__(self, y):
692        if isinstance(y, Obs):
693            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
694        else:
695            if isinstance(y, np.ndarray):
696                return np.array([self + o for o in y])
697            elif y.__class__.__name__ in ['Corr', 'CObs']:
698                return NotImplemented
699            else:
700                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
701
702    def __radd__(self, y):
703        return self + y
704
705    def __mul__(self, y):
706        if isinstance(y, Obs):
707            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
708        else:
709            if isinstance(y, np.ndarray):
710                return np.array([self * o for o in y])
711            elif isinstance(y, complex):
712                return CObs(self * y.real, self * y.imag)
713            elif y.__class__.__name__ in ['Corr', 'CObs']:
714                return NotImplemented
715            else:
716                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
717
718    def __rmul__(self, y):
719        return self * y
720
721    def __sub__(self, y):
722        if isinstance(y, Obs):
723            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
724        else:
725            if isinstance(y, np.ndarray):
726                return np.array([self - o for o in y])
727            elif y.__class__.__name__ in ['Corr', 'CObs']:
728                return NotImplemented
729            else:
730                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
731
732    def __rsub__(self, y):
733        return -1 * (self - y)
734
735    def __pos__(self):
736        return self
737
738    def __neg__(self):
739        return -1 * self
740
741    def __truediv__(self, y):
742        if isinstance(y, Obs):
743            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
744        else:
745            if isinstance(y, np.ndarray):
746                return np.array([self / o for o in y])
747            elif y.__class__.__name__ in ['Corr', 'CObs']:
748                return NotImplemented
749            else:
750                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
751
752    def __rtruediv__(self, y):
753        if isinstance(y, Obs):
754            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
755        else:
756            if isinstance(y, np.ndarray):
757                return np.array([o / self for o in y])
758            elif y.__class__.__name__ in ['Corr', 'CObs']:
759                return NotImplemented
760            else:
761                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
762
763    def __pow__(self, y):
764        if isinstance(y, Obs):
765            return derived_observable(lambda x: x[0] ** x[1], [self, y])
766        else:
767            return derived_observable(lambda x: x[0] ** y, [self])
768
769    def __rpow__(self, y):
770        if isinstance(y, Obs):
771            return derived_observable(lambda x: x[0] ** x[1], [y, self])
772        else:
773            return derived_observable(lambda x: y ** x[0], [self])
774
775    def __abs__(self):
776        return derived_observable(lambda x: anp.abs(x[0]), [self])
777
778    # Overload numpy functions
779    def sqrt(self):
780        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
781
782    def log(self):
783        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
784
785    def exp(self):
786        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
787
788    def sin(self):
789        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
790
791    def cos(self):
792        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
793
794    def tan(self):
795        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
796
797    def arcsin(self):
798        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
799
800    def arccos(self):
801        return derived_observable(lambda x: anp.arccos(x[0]), [self])
802
803    def arctan(self):
804        return derived_observable(lambda x: anp.arctan(x[0]), [self])
805
806    def sinh(self):
807        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
808
809    def cosh(self):
810        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
811
812    def tanh(self):
813        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
814
815    def arcsinh(self):
816        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
817
818    def arccosh(self):
819        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
820
821    def arctanh(self):
822        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 57    def __init__(self, samples, names, idl=None, **kwargs):
 58        """ Initialize Obs object.
 59
 60        Parameters
 61        ----------
 62        samples : list
 63            list of numpy arrays containing the Monte Carlo samples
 64        names : list
 65            list of strings labeling the individual samples
 66        idl : list, optional
 67            list of ranges or lists on which the samples are defined
 68        """
 69
 70        if kwargs.get("means") is None and len(samples):
 71            if len(samples) != len(names):
 72                raise Exception('Length of samples and names incompatible.')
 73            if idl is not None:
 74                if len(idl) != len(names):
 75                    raise Exception('Length of idl incompatible with samples and names.')
 76            name_length = len(names)
 77            if name_length > 1:
 78                if name_length != len(set(names)):
 79                    raise Exception('names are not unique.')
 80                if not all(isinstance(x, str) for x in names):
 81                    raise TypeError('All names have to be strings.')
 82            else:
 83                if not isinstance(names[0], str):
 84                    raise TypeError('All names have to be strings.')
 85            if min(len(x) for x in samples) <= 4:
 86                raise Exception('Samples have to have at least 5 entries.')
 87
 88        self.names = sorted(names)
 89        self.shape = {}
 90        self.r_values = {}
 91        self.deltas = {}
 92        self._covobs = {}
 93
 94        self._value = 0
 95        self.N = 0
 96        self.is_merged = {}
 97        self.idl = {}
 98        if idl is not None:
 99            for name, idx in sorted(zip(names, idl)):
100                if isinstance(idx, range):
101                    self.idl[name] = idx
102                elif isinstance(idx, (list, np.ndarray)):
103                    dc = np.unique(np.diff(idx))
104                    if np.any(dc < 0):
105                        raise Exception("Unsorted idx for idl[%s]" % (name))
106                    if len(dc) == 1:
107                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
108                    else:
109                        self.idl[name] = list(idx)
110                else:
111                    raise Exception('incompatible type for idl[%s].' % (name))
112        else:
113            for name, sample in sorted(zip(names, samples)):
114                self.idl[name] = range(1, len(sample) + 1)
115
116        if kwargs.get("means") is not None:
117            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
118                self.shape[name] = len(self.idl[name])
119                self.N += self.shape[name]
120                self.r_values[name] = mean
121                self.deltas[name] = sample
122        else:
123            for name, sample in sorted(zip(names, samples)):
124                self.shape[name] = len(self.idl[name])
125                self.N += self.shape[name]
126                if len(sample) != self.shape[name]:
127                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
128                self.r_values[name] = np.mean(sample)
129                self.deltas[name] = sample - self.r_values[name]
130                self._value += self.shape[name] * self.r_values[name]
131            self._value /= self.N
132
133        self._dvalue = 0.0
134        self.ddvalue = 0.0
135        self.reweighted = False
136
137        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
S_global = 2.0
S_dict = {}
tau_exp_global = 0.0
tau_exp_dict = {}
N_sigma_global = 1.0
N_sigma_dict = {}
filter_eps = 1e-10
names
shape
r_values
deltas
N
is_merged
idl
ddvalue
reweighted
tag
value
dvalue
e_names
cov_names
mc_names
e_content
covobs
def gamma_method(self, **kwargs)
172    def gamma_method(self, **kwargs):
173        """Estimate the error and related properties of the Obs.
174
175        Parameters
176        ----------
177        S : float
178            specifies a custom value for the parameter S (default 2.0).
179            If set to 0 it is assumed that the data exhibits no
180            autocorrelation. In this case the error estimates coincides
181            with the sample standard error.
182        tau_exp : float
183            positive value triggers the critical slowing down analysis
184            (default 0.0).
185        N_sigma : float
186            number of standard deviations from zero until the tail is
187            attached to the autocorrelation function (default 1).
188        fft : bool
189            determines whether the fft algorithm is used for the computation
190            of the autocorrelation function (default True)
191        """
192
193        e_content = self.e_content
194        self.e_dvalue = {}
195        self.e_ddvalue = {}
196        self.e_tauint = {}
197        self.e_dtauint = {}
198        self.e_windowsize = {}
199        self.e_n_tauint = {}
200        self.e_n_dtauint = {}
201        e_gamma = {}
202        self.e_rho = {}
203        self.e_drho = {}
204        self._dvalue = 0
205        self.ddvalue = 0
206
207        self.S = {}
208        self.tau_exp = {}
209        self.N_sigma = {}
210
211        if kwargs.get('fft') is False:
212            fft = False
213        else:
214            fft = True
215
216        def _parse_kwarg(kwarg_name):
217            if kwarg_name in kwargs:
218                tmp = kwargs.get(kwarg_name)
219                if isinstance(tmp, (int, float)):
220                    if tmp < 0:
221                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
222                    for e, e_name in enumerate(self.e_names):
223                        getattr(self, kwarg_name)[e_name] = tmp
224                else:
225                    raise TypeError(kwarg_name + ' is not in proper format.')
226            else:
227                for e, e_name in enumerate(self.e_names):
228                    if e_name in getattr(Obs, kwarg_name + '_dict'):
229                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
230                    else:
231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
232
233        _parse_kwarg('S')
234        _parse_kwarg('tau_exp')
235        _parse_kwarg('N_sigma')
236
237        for e, e_name in enumerate(self.mc_names):
238            r_length = []
239            for r_name in e_content[e_name]:
240                if isinstance(self.idl[r_name], range):
241                    r_length.append(len(self.idl[r_name]))
242                else:
243                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
244
245            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
246            w_max = max(r_length) // 2
247            e_gamma[e_name] = np.zeros(w_max)
248            self.e_rho[e_name] = np.zeros(w_max)
249            self.e_drho[e_name] = np.zeros(w_max)
250
251            for r_name in e_content[e_name]:
252                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
253
254            gamma_div = np.zeros(w_max)
255            for r_name in e_content[e_name]:
256                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
257            gamma_div[gamma_div < 1] = 1.0
258            e_gamma[e_name] /= gamma_div[:w_max]
259
260            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
261                self.e_tauint[e_name] = 0.5
262                self.e_dtauint[e_name] = 0.0
263                self.e_dvalue[e_name] = 0.0
264                self.e_ddvalue[e_name] = 0.0
265                self.e_windowsize[e_name] = 0
266                continue
267
268            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
269            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
270            # Make sure no entry of tauint is smaller than 0.5
271            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
272            # hep-lat/0306017 eq. (42)
273            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
274            self.e_n_dtauint[e_name][0] = 0.0
275
276            def _compute_drho(i):
277                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
278                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
279
280            _compute_drho(1)
281            if self.tau_exp[e_name] > 0:
282                texp = self.tau_exp[e_name]
283                # Critical slowing down analysis
284                if w_max // 2 <= 1:
285                    raise Exception("Need at least 8 samples for tau_exp error analysis")
286                for n in range(1, w_max // 2):
287                    _compute_drho(n + 1)
288                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
289                        # Bias correction hep-lat/0306017 eq. (49) included
290                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
291                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
292                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
293                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
294                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
295                        self.e_windowsize[e_name] = n
296                        break
297            else:
298                if self.S[e_name] == 0.0:
299                    self.e_tauint[e_name] = 0.5
300                    self.e_dtauint[e_name] = 0.0
301                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
302                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
303                    self.e_windowsize[e_name] = 0
304                else:
305                    # Standard automatic windowing procedure
306                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
307                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
308                    for n in range(1, w_max):
309                        if n < w_max // 2 - 2:
310                            _compute_drho(n + 1)
311                        if g_w[n - 1] < 0 or n >= w_max - 1:
312                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
313                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
314                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
315                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
316                            self.e_windowsize[e_name] = n
317                            break
318
319            self._dvalue += self.e_dvalue[e_name] ** 2
320            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
321
322        for e_name in self.cov_names:
323            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
324            self.e_ddvalue[e_name] = 0
325            self._dvalue += self.e_dvalue[e_name]**2
326
327        self._dvalue = np.sqrt(self._dvalue)
328        if self._dvalue == 0.0:
329            self.ddvalue = 0.0
330        else:
331            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
332        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True)
367    def details(self, ens_content=True):
368        """Output detailed properties of the Obs.
369
370        Parameters
371        ----------
372        ens_content : bool
373            print details about the ensembles and replica if true.
374        """
375        if self.tag is not None:
376            print("Description:", self.tag)
377        if not hasattr(self, 'e_dvalue'):
378            print('Result\t %3.8e' % (self.value))
379        else:
380            if self.value == 0.0:
381                percentage = np.nan
382            else:
383                percentage = np.abs(self._dvalue / self.value) * 100
384            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
385            if len(self.e_names) > 1:
386                print(' Ensemble errors:')
387            for e_name in self.mc_names:
388                if len(self.e_names) > 1:
389                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
390                if self.tau_exp[e_name] > 0:
391                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
392                else:
393                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
394            for e_name in self.cov_names:
395                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
396        if ens_content is True:
397            if len(self.e_names) == 1:
398                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
399            else:
400                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
401            my_string_list = []
402            for key, value in sorted(self.e_content.items()):
403                if key not in self.covobs:
404                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
405                    if len(value) == 1:
406                        my_string += f': {self.shape[value[0]]} configurations'
407                        if isinstance(self.idl[value[0]], range):
408                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
409                        else:
410                            my_string += ' (irregular range)'
411                    else:
412                        sublist = []
413                        for v in value:
414                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
415                            my_substring += f': {self.shape[v]} configurations'
416                            if isinstance(self.idl[v], range):
417                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
418                            else:
419                                my_substring += ' (irregular range)'
420                            sublist.append(my_substring)
421
422                        my_string += '\n' + '\n'.join(sublist)
423                else:
424                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
425                my_string_list.append(my_string)
426            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def is_zero_within_error(self, sigma=1)
428    def is_zero_within_error(self, sigma=1):
429        """Checks whether the observable is zero within 'sigma' standard errors.
430
431        Parameters
432        ----------
433        sigma : int
434            Number of standard errors used for the check.
435
436        Works only properly when the gamma method was run.
437        """
438        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10)
440    def is_zero(self, atol=1e-10):
441        """Checks whether the observable is zero within a given tolerance.
442
443        Parameters
444        ----------
445        atol : float
446            Absolute tolerance (for details see numpy documentation).
447        """
448        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None)
450    def plot_tauint(self, save=None):
451        """Plot integrated autocorrelation time for each ensemble.
452
453        Parameters
454        ----------
455        save : str
456            saves the figure to a file named 'save' if.
457        """
458        if not hasattr(self, 'e_dvalue'):
459            raise Exception('Run the gamma method first.')
460
461        for e, e_name in enumerate(self.mc_names):
462            fig = plt.figure()
463            plt.xlabel(r'$W$')
464            plt.ylabel(r'$\tau_\mathrm{int}$')
465            length = int(len(self.e_n_tauint[e_name]))
466            if self.tau_exp[e_name] > 0:
467                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
468                x_help = np.arange(2 * self.tau_exp[e_name])
469                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
470                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
471                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
472                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
473                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
474                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
475                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
476            else:
477                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
478                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
479
480            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
481            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
482            plt.legend()
483            plt.xlim(-0.5, xmax)
484            ylim = plt.ylim()
485            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
486            plt.draw()
487            if save:
488                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None)
490    def plot_rho(self, save=None):
491        """Plot normalized autocorrelation function time for each ensemble.
492
493        Parameters
494        ----------
495        save : str
496            saves the figure to a file named 'save' if.
497        """
498        if not hasattr(self, 'e_dvalue'):
499            raise Exception('Run the gamma method first.')
500        for e, e_name in enumerate(self.mc_names):
501            fig = plt.figure()
502            plt.xlabel('W')
503            plt.ylabel('rho')
504            length = int(len(self.e_drho[e_name]))
505            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
506            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
507            if self.tau_exp[e_name] > 0:
508                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
509                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
510                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
511                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
512            else:
513                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
514                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
515            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
516            plt.xlim(-0.5, xmax)
517            plt.draw()
518            if save:
519                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self)
521    def plot_rep_dist(self):
522        """Plot replica distribution for each ensemble with more than one replicum."""
523        if not hasattr(self, 'e_dvalue'):
524            raise Exception('Run the gamma method first.')
525        for e, e_name in enumerate(self.mc_names):
526            if len(self.e_content[e_name]) == 1:
527                print('No replica distribution for a single replicum (', e_name, ')')
528                continue
529            r_length = []
530            sub_r_mean = 0
531            for r, r_name in enumerate(self.e_content[e_name]):
532                r_length.append(len(self.deltas[r_name]))
533                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
534            e_N = np.sum(r_length)
535            sub_r_mean /= e_N
536            arr = np.zeros(len(self.e_content[e_name]))
537            for r, r_name in enumerate(self.e_content[e_name]):
538                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
539            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
540            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
541            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True)
543    def plot_history(self, expand=True):
544        """Plot derived Monte Carlo history for each ensemble
545
546        Parameters
547        ----------
548        expand : bool
549            show expanded history for irregular Monte Carlo chains (default: True).
550        """
551        for e, e_name in enumerate(self.mc_names):
552            plt.figure()
553            r_length = []
554            tmp = []
555            tmp_expanded = []
556            for r, r_name in enumerate(self.e_content[e_name]):
557                tmp.append(self.deltas[r_name] + self.r_values[r_name])
558                if expand:
559                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
560                    r_length.append(len(tmp_expanded[-1]))
561                else:
562                    r_length.append(len(tmp[-1]))
563            e_N = np.sum(r_length)
564            x = np.arange(e_N)
565            y_test = np.concatenate(tmp, axis=0)
566            if expand:
567                y = np.concatenate(tmp_expanded, axis=0)
568            else:
569                y = y_test
570            plt.errorbar(x, y, fmt='.', markersize=3)
571            plt.xlim(-0.5, e_N - 0.5)
572            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
573            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None)
575    def plot_piechart(self, save=None):
576        """Plot piechart which shows the fractional contribution of each
577        ensemble to the error and returns a dictionary containing the fractions.
578
579        Parameters
580        ----------
581        save : str
582            saves the figure to a file named 'save' if.
583        """
584        if not hasattr(self, 'e_dvalue'):
585            raise Exception('Run the gamma method first.')
586        if np.isclose(0.0, self._dvalue, atol=1e-15):
587            raise Exception('Error is 0.0')
588        labels = self.e_names
589        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
590        fig1, ax1 = plt.subplots()
591        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
592        ax1.axis('equal')
593        plt.draw()
594        if save:
595            fig1.savefig(save)
596
597        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs)
599    def dump(self, filename, datatype="json.gz", description="", **kwargs):
600        """Dump the Obs to a file 'name' of chosen format.
601
602        Parameters
603        ----------
604        filename : str
605            name of the file to be saved.
606        datatype : str
607            Format of the exported file. Supported formats include
608            "json.gz" and "pickle"
609        description : str
610            Description for output file, only relevant for json.gz format.
611        path : str
612            specifies a custom path for the file (default '.')
613        """
614        if 'path' in kwargs:
615            file_name = kwargs.get('path') + '/' + filename
616        else:
617            file_name = filename
618
619        if datatype == "json.gz":
620            from .input.json import dump_to_json
621            dump_to_json([self], file_name, description=description)
622        elif datatype == "pickle":
623            with open(file_name + '.p', 'wb') as fb:
624                pickle.dump(self, fb)
625        else:
626            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self)
628    def export_jackknife(self):
629        """Export jackknife samples from the Obs
630
631        Returns
632        -------
633        numpy.ndarray
634            Returns a numpy array of length N + 1 where N is the number of samples
635            for the given ensemble and replicum. The zeroth entry of the array contains
636            the mean value of the Obs, entries 1 to N contain the N jackknife samples
637            derived from the Obs. The current implementation only works for observables
638            defined on exactly one ensemble and replicum. The derived jackknife samples
639            should agree with samples from a full jackknife analysis up to O(1/N).
640        """
641
642        if len(self.names) != 1:
643            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
644
645        name = self.names[0]
646        full_data = self.deltas[name] + self.r_values[name]
647        n = full_data.size
648        mean = self.value
649        tmp_jacks = np.zeros(n + 1)
650        tmp_jacks[0] = mean
651        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
652        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def sqrt(self)
779    def sqrt(self):
780        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self)
782    def log(self):
783        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self)
785    def exp(self):
786        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self)
788    def sin(self):
789        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self)
791    def cos(self):
792        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self)
794    def tan(self):
795        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self)
797    def arcsin(self):
798        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self)
800    def arccos(self):
801        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self)
803    def arctan(self):
804        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self)
806    def sinh(self):
807        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self)
809    def cosh(self):
810        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self)
812    def tanh(self):
813        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self)
815    def arcsinh(self):
816        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self)
818    def arccosh(self):
819        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self)
821    def arctanh(self):
822        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
N_sigma
S
e_ddvalue
e_drho
e_dtauint
e_dvalue
e_n_dtauint
e_n_tauint
e_rho
e_tauint
e_windowsize
tau_exp
class CObs:
825class CObs:
826    """Class for a complex valued observable."""
827    __slots__ = ['_real', '_imag', 'tag']
828
829    def __init__(self, real, imag=0.0):
830        self._real = real
831        self._imag = imag
832        self.tag = None
833
834    @property
835    def real(self):
836        return self._real
837
838    @property
839    def imag(self):
840        return self._imag
841
842    def gamma_method(self, **kwargs):
843        """Executes the gamma_method for the real and the imaginary part."""
844        if isinstance(self.real, Obs):
845            self.real.gamma_method(**kwargs)
846        if isinstance(self.imag, Obs):
847            self.imag.gamma_method(**kwargs)
848
849    def is_zero(self):
850        """Checks whether both real and imaginary part are zero within machine precision."""
851        return self.real == 0.0 and self.imag == 0.0
852
853    def conjugate(self):
854        return CObs(self.real, -self.imag)
855
856    def __add__(self, other):
857        if isinstance(other, np.ndarray):
858            return other + self
859        elif hasattr(other, 'real') and hasattr(other, 'imag'):
860            return CObs(self.real + other.real,
861                        self.imag + other.imag)
862        else:
863            return CObs(self.real + other, self.imag)
864
865    def __radd__(self, y):
866        return self + y
867
868    def __sub__(self, other):
869        if isinstance(other, np.ndarray):
870            return -1 * (other - self)
871        elif hasattr(other, 'real') and hasattr(other, 'imag'):
872            return CObs(self.real - other.real, self.imag - other.imag)
873        else:
874            return CObs(self.real - other, self.imag)
875
876    def __rsub__(self, other):
877        return -1 * (self - other)
878
879    def __mul__(self, other):
880        if isinstance(other, np.ndarray):
881            return other * self
882        elif hasattr(other, 'real') and hasattr(other, 'imag'):
883            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
884                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
885                                               [self.real, other.real, self.imag, other.imag],
886                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
887                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
888                                               [self.real, other.real, self.imag, other.imag],
889                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
890            elif getattr(other, 'imag', 0) != 0:
891                return CObs(self.real * other.real - self.imag * other.imag,
892                            self.imag * other.real + self.real * other.imag)
893            else:
894                return CObs(self.real * other.real, self.imag * other.real)
895        else:
896            return CObs(self.real * other, self.imag * other)
897
898    def __rmul__(self, other):
899        return self * other
900
901    def __truediv__(self, other):
902        if isinstance(other, np.ndarray):
903            return 1 / (other / self)
904        elif hasattr(other, 'real') and hasattr(other, 'imag'):
905            r = other.real ** 2 + other.imag ** 2
906            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
907        else:
908            return CObs(self.real / other, self.imag / other)
909
910    def __rtruediv__(self, other):
911        r = self.real ** 2 + self.imag ** 2
912        if hasattr(other, 'real') and hasattr(other, 'imag'):
913            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
914        else:
915            return CObs(self.real * other / r, -self.imag * other / r)
916
917    def __abs__(self):
918        return np.sqrt(self.real**2 + self.imag**2)
919
920    def __pos__(self):
921        return self
922
923    def __neg__(self):
924        return -1 * self
925
926    def __eq__(self, other):
927        return self.real == other.real and self.imag == other.imag
928
929    def __str__(self):
930        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
931
932    def __repr__(self):
933        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0)
829    def __init__(self, real, imag=0.0):
830        self._real = real
831        self._imag = imag
832        self.tag = None
tag
real
imag
def gamma_method(self, **kwargs)
842    def gamma_method(self, **kwargs):
843        """Executes the gamma_method for the real and the imaginary part."""
844        if isinstance(self.real, Obs):
845            self.real.gamma_method(**kwargs)
846        if isinstance(self.imag, Obs):
847            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self)
849    def is_zero(self):
850        """Checks whether both real and imaginary part are zero within machine precision."""
851        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self)
853    def conjugate(self):
854        return CObs(self.real, -self.imag)
def derived_observable(func, data, array_mode=False, **kwargs)
1041def derived_observable(func, data, array_mode=False, **kwargs):
1042    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1043
1044    Parameters
1045    ----------
1046    func : object
1047        arbitrary function of the form func(data, **kwargs). For the
1048        automatic differentiation to work, all numpy functions have to have
1049        the autograd wrapper (use 'import autograd.numpy as anp').
1050    data : list
1051        list of Obs, e.g. [obs1, obs2, obs3].
1052    num_grad : bool
1053        if True, numerical derivatives are used instead of autograd
1054        (default False). To control the numerical differentiation the
1055        kwargs of numdifftools.step_generators.MaxStepGenerator
1056        can be used.
1057    man_grad : list
1058        manually supply a list or an array which contains the jacobian
1059        of func. Use cautiously, supplying the wrong derivative will
1060        not be intercepted.
1061
1062    Notes
1063    -----
1064    For simple mathematical operations it can be practical to use anonymous
1065    functions. For the ratio of two observables one can e.g. use
1066
1067    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1068    """
1069
1070    data = np.asarray(data)
1071    raveled_data = data.ravel()
1072
1073    # Workaround for matrix operations containing non Obs data
1074    if not all(isinstance(x, Obs) for x in raveled_data):
1075        for i in range(len(raveled_data)):
1076            if isinstance(raveled_data[i], (int, float)):
1077                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1078
1079    allcov = {}
1080    for o in raveled_data:
1081        for name in o.cov_names:
1082            if name in allcov:
1083                if not np.allclose(allcov[name], o.covobs[name].cov):
1084                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1085            else:
1086                allcov[name] = o.covobs[name].cov
1087
1088    n_obs = len(raveled_data)
1089    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1090    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1091    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1092
1093    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1094    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1095
1096    if data.ndim == 1:
1097        values = np.array([o.value for o in data])
1098    else:
1099        values = np.vectorize(lambda x: x.value)(data)
1100
1101    new_values = func(values, **kwargs)
1102
1103    multi = int(isinstance(new_values, np.ndarray))
1104
1105    new_r_values = {}
1106    new_idl_d = {}
1107    for name in new_sample_names:
1108        idl = []
1109        tmp_values = np.zeros(n_obs)
1110        for i, item in enumerate(raveled_data):
1111            tmp_values[i] = item.r_values.get(name, item.value)
1112            tmp_idl = item.idl.get(name)
1113            if tmp_idl is not None:
1114                idl.append(tmp_idl)
1115        if multi > 0:
1116            tmp_values = np.array(tmp_values).reshape(data.shape)
1117        new_r_values[name] = func(tmp_values, **kwargs)
1118        new_idl_d[name] = _merge_idx(idl)
1119        if not is_merged[name]:
1120            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1121
1122    if 'man_grad' in kwargs:
1123        deriv = np.asarray(kwargs.get('man_grad'))
1124        if new_values.shape + data.shape != deriv.shape:
1125            raise Exception('Manual derivative does not have correct shape.')
1126    elif kwargs.get('num_grad') is True:
1127        if multi > 0:
1128            raise Exception('Multi mode currently not supported for numerical derivative')
1129        options = {
1130            'base_step': 0.1,
1131            'step_ratio': 2.5}
1132        for key in options.keys():
1133            kwarg = kwargs.get(key)
1134            if kwarg is not None:
1135                options[key] = kwarg
1136        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1137        if tmp_df.size == 1:
1138            deriv = np.array([tmp_df.real])
1139        else:
1140            deriv = tmp_df.real
1141    else:
1142        deriv = jacobian(func)(values, **kwargs)
1143
1144    final_result = np.zeros(new_values.shape, dtype=object)
1145
1146    if array_mode is True:
1147
1148        class _Zero_grad():
1149            def __init__(self, N):
1150                self.grad = np.zeros((N, 1))
1151
1152        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1153        d_extracted = {}
1154        g_extracted = {}
1155        for name in new_sample_names:
1156            d_extracted[name] = []
1157            ens_length = len(new_idl_d[name])
1158            for i_dat, dat in enumerate(data):
1159                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1160        for name in new_cov_names:
1161            g_extracted[name] = []
1162            zero_grad = _Zero_grad(new_covobs_lengths[name])
1163            for i_dat, dat in enumerate(data):
1164                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1165
1166    for i_val, new_val in np.ndenumerate(new_values):
1167        new_deltas = {}
1168        new_grad = {}
1169        if array_mode is True:
1170            for name in new_sample_names:
1171                ens_length = d_extracted[name][0].shape[-1]
1172                new_deltas[name] = np.zeros(ens_length)
1173                for i_dat, dat in enumerate(d_extracted[name]):
1174                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1175            for name in new_cov_names:
1176                new_grad[name] = 0
1177                for i_dat, dat in enumerate(g_extracted[name]):
1178                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1179        else:
1180            for j_obs, obs in np.ndenumerate(data):
1181                for name in obs.names:
1182                    if name in obs.cov_names:
1183                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1184                    else:
1185                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1186
1187        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1188
1189        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1190            raise Exception('The same name has been used for deltas and covobs!')
1191        new_samples = []
1192        new_means = []
1193        new_idl = []
1194        new_names_obs = []
1195        for name in new_names:
1196            if name not in new_covobs:
1197                if is_merged[name]:
1198                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1199                else:
1200                    filtered_deltas = new_deltas[name]
1201                    filtered_idl_d = new_idl_d[name]
1202
1203                new_samples.append(filtered_deltas)
1204                new_idl.append(filtered_idl_d)
1205                new_means.append(new_r_values[name][i_val])
1206                new_names_obs.append(name)
1207        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1208        for name in new_covobs:
1209            final_result[i_val].names.append(name)
1210        final_result[i_val]._covobs = new_covobs
1211        final_result[i_val]._value = new_val
1212        final_result[i_val].is_merged = is_merged
1213        final_result[i_val].reweighted = reweighted
1214
1215    if multi == 0:
1216        final_result = final_result.item()
1217
1218    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs)
1258def reweight(weight, obs, **kwargs):
1259    """Reweight a list of observables.
1260
1261    Parameters
1262    ----------
1263    weight : Obs
1264        Reweighting factor. An Observable that has to be defined on a superset of the
1265        configurations in obs[i].idl for all i.
1266    obs : list
1267        list of Obs, e.g. [obs1, obs2, obs3].
1268    all_configs : bool
1269        if True, the reweighted observables are normalized by the average of
1270        the reweighting factor on all configurations in weight.idl and not
1271        on the configurations in obs[i].idl.
1272    """
1273    result = []
1274    for i in range(len(obs)):
1275        if len(obs[i].cov_names):
1276            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1277        if not set(obs[i].names).issubset(weight.names):
1278            raise Exception('Error: Ensembles do not fit')
1279        for name in obs[i].names:
1280            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1281                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1282        new_samples = []
1283        w_deltas = {}
1284        for name in sorted(obs[i].names):
1285            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1286            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1287        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1288
1289        if kwargs.get('all_configs'):
1290            new_weight = weight
1291        else:
1292            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1293
1294        result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs))
1295        result[-1].reweighted = True
1296        result[-1].is_merged = obs[i].is_merged
1297
1298    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl.
def correlate(obs_a, obs_b)
1301def correlate(obs_a, obs_b):
1302    """Correlate two observables.
1303
1304    Parameters
1305    ----------
1306    obs_a : Obs
1307        First observable
1308    obs_b : Obs
1309        Second observable
1310
1311    Notes
1312    -----
1313    Keep in mind to only correlate primary observables which have not been reweighted
1314    yet. The reweighting has to be applied after correlating the observables.
1315    Currently only works if ensembles are identical (this is not strictly necessary).
1316    """
1317
1318    if sorted(obs_a.names) != sorted(obs_b.names):
1319        raise Exception('Ensembles do not fit')
1320    if len(obs_a.cov_names) or len(obs_b.cov_names):
1321        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1322    for name in obs_a.names:
1323        if obs_a.shape[name] != obs_b.shape[name]:
1324            raise Exception('Shapes of ensemble', name, 'do not fit')
1325        if obs_a.idl[name] != obs_b.idl[name]:
1326            raise Exception('idl of ensemble', name, 'do not fit')
1327
1328    if obs_a.reweighted is True:
1329        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1330    if obs_b.reweighted is True:
1331        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1332
1333    new_samples = []
1334    new_idl = []
1335    for name in sorted(obs_a.names):
1336        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1337        new_idl.append(obs_a.idl[name])
1338
1339    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1340    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1341    o.reweighted = obs_a.reweighted or obs_b.reweighted
1342    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs)
1345def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1346    r'''Calculates the error covariance matrix of a set of observables.
1347
1348    The gamma method has to be applied first to all observables.
1349
1350    Parameters
1351    ----------
1352    obs : list or numpy.ndarray
1353        List or one dimensional array of Obs
1354    visualize : bool
1355        If True plots the corresponding normalized correlation matrix (default False).
1356    correlation : bool
1357        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1358    smooth : None or int
1359        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1360        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1361        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1362        small ones.
1363
1364    Notes
1365    -----
1366    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1367    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1368    in the absence of autocorrelation.
1369    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1370    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1371    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1372    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1373    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1374    '''
1375
1376    length = len(obs)
1377
1378    max_samples = np.max([o.N for o in obs])
1379    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1380        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1381
1382    cov = np.zeros((length, length))
1383    for i in range(length):
1384        for j in range(i, length):
1385            cov[i, j] = _covariance_element(obs[i], obs[j])
1386    cov = cov + cov.T - np.diag(np.diag(cov))
1387
1388    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1389
1390    if isinstance(smooth, int):
1391        corr = _smooth_eigenvalues(corr, smooth)
1392
1393    errors = [o.dvalue for o in obs]
1394    cov = np.diag(errors) @ corr @ np.diag(errors)
1395
1396    eigenvalues = np.linalg.eigh(cov)[0]
1397    if not np.all(eigenvalues >= 0):
1398        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1399
1400    if visualize:
1401        plt.matshow(corr, vmin=-1, vmax=1)
1402        plt.set_cmap('RdBu')
1403        plt.colorbar()
1404        plt.draw()
1405
1406    if correlation is True:
1407        return corr
1408    else:
1409        return cov

Calculates the error covariance matrix of a set of observables.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None)
1488def import_jackknife(jacks, name, idl=None):
1489    """Imports jackknife samples and returns an Obs
1490
1491    Parameters
1492    ----------
1493    jacks : numpy.ndarray
1494        numpy array containing the mean value as zeroth entry and
1495        the N jackknife samples as first to Nth entry.
1496    name : str
1497        name of the ensemble the samples are defined on.
1498    """
1499    length = len(jacks) - 1
1500    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1501    samples = jacks[1:] @ prj
1502    mean = np.mean(samples)
1503    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1504    new_obs._value = jacks[0]
1505    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def merge_obs(list_of_obs)
1508def merge_obs(list_of_obs):
1509    """Combine all observables in list_of_obs into one new observable
1510
1511    Parameters
1512    ----------
1513    list_of_obs : list
1514        list of the Obs object to be combined
1515
1516    Notes
1517    -----
1518    It is not possible to combine obs which are based on the same replicum
1519    """
1520    replist = [item for obs in list_of_obs for item in obs.names]
1521    if (len(replist) == len(set(replist))) is False:
1522        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1523    if any([len(o.cov_names) for o in list_of_obs]):
1524        raise Exception('Not possible to merge data that contains covobs!')
1525    new_dict = {}
1526    idl_dict = {}
1527    for o in list_of_obs:
1528        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1529                        for key in set(o.deltas) | set(o.r_values)})
1530        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1531
1532    names = sorted(new_dict.keys())
1533    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1534    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1535    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1536    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None)
1539def cov_Obs(means, cov, name, grad=None):
1540    """Create an Obs based on mean(s) and a covariance matrix
1541
1542    Parameters
1543    ----------
1544    mean : list of floats or float
1545        N mean value(s) of the new Obs
1546    cov : list or array
1547        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1548    name : str
1549        identifier for the covariance matrix
1550    grad : list or array
1551        Gradient of the Covobs wrt. the means belonging to cov.
1552    """
1553
1554    def covobs_to_obs(co):
1555        """Make an Obs out of a Covobs
1556
1557        Parameters
1558        ----------
1559        co : Covobs
1560            Covobs to be embedded into the Obs
1561        """
1562        o = Obs([], [], means=[])
1563        o._value = co.value
1564        o.names.append(co.name)
1565        o._covobs[co.name] = co
1566        o._dvalue = np.sqrt(co.errsq())
1567        return o
1568
1569    ol = []
1570    if isinstance(means, (float, int)):
1571        means = [means]
1572
1573    for i in range(len(means)):
1574        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1575    if ol[0].covobs[name].N != len(means):
1576        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1577    if len(ol) == 1:
1578        return ol[0]
1579    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.