pyerrors.obs

View Source
   0import warnings
   1import pickle
   2import numpy as np
   3import autograd.numpy as anp  # Thinly-wrapped numpy
   4from autograd import jacobian
   5import matplotlib.pyplot as plt
   6from scipy.stats import skew, skewtest, kurtosis, kurtosistest
   7import numdifftools as nd
   8from itertools import groupby
   9from .covobs import Covobs
  10
  11
  12class Obs:
  13    """Class for a general observable.
  14
  15    Instances of Obs are the basic objects of a pyerrors error analysis.
  16    They are initialized with a list which contains arrays of samples for
  17    different ensembles/replica and another list of same length which contains
  18    the names of the ensembles/replica. Mathematical operations can be
  19    performed on instances. The result is another instance of Obs. The error of
  20    an instance can be computed with the gamma_method. Also contains additional
  21    methods for output and visualization of the error calculation.
  22
  23    Attributes
  24    ----------
  25    S_global : float
  26        Standard value for S (default 2.0)
  27    S_dict : dict
  28        Dictionary for S values. If an entry for a given ensemble
  29        exists this overwrites the standard value for that ensemble.
  30    tau_exp_global : float
  31        Standard value for tau_exp (default 0.0)
  32    tau_exp_dict : dict
  33        Dictionary for tau_exp values. If an entry for a given ensemble exists
  34        this overwrites the standard value for that ensemble.
  35    N_sigma_global : float
  36        Standard value for N_sigma (default 1.0)
  37    N_sigma_dict : dict
  38        Dictionary for N_sigma values. If an entry for a given ensemble exists
  39        this overwrites the standard value for that ensemble.
  40    """
  41    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  42                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  43                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  44                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  45                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
  46
  47    S_global = 2.0
  48    S_dict = {}
  49    tau_exp_global = 0.0
  50    tau_exp_dict = {}
  51    N_sigma_global = 1.0
  52    N_sigma_dict = {}
  53    filter_eps = 1e-10
  54
  55    def __init__(self, samples, names, idl=None, **kwargs):
  56        """ Initialize Obs object.
  57
  58        Parameters
  59        ----------
  60        samples : list
  61            list of numpy arrays containing the Monte Carlo samples
  62        names : list
  63            list of strings labeling the individual samples
  64        idl : list, optional
  65            list of ranges or lists on which the samples are defined
  66        """
  67
  68        if kwargs.get("means") is None and len(samples):
  69            if len(samples) != len(names):
  70                raise Exception('Length of samples and names incompatible.')
  71            if idl is not None:
  72                if len(idl) != len(names):
  73                    raise Exception('Length of idl incompatible with samples and names.')
  74            name_length = len(names)
  75            if name_length > 1:
  76                if name_length != len(set(names)):
  77                    raise Exception('names are not unique.')
  78                if not all(isinstance(x, str) for x in names):
  79                    raise TypeError('All names have to be strings.')
  80            else:
  81                if not isinstance(names[0], str):
  82                    raise TypeError('All names have to be strings.')
  83            if min(len(x) for x in samples) <= 4:
  84                raise Exception('Samples have to have at least 5 entries.')
  85
  86        self.names = sorted(names)
  87        self.shape = {}
  88        self.r_values = {}
  89        self.deltas = {}
  90        self._covobs = {}
  91
  92        self._value = 0
  93        self.N = 0
  94        self.is_merged = {}
  95        self.idl = {}
  96        if idl is not None:
  97            for name, idx in sorted(zip(names, idl)):
  98                if isinstance(idx, range):
  99                    self.idl[name] = idx
 100                elif isinstance(idx, (list, np.ndarray)):
 101                    dc = np.unique(np.diff(idx))
 102                    if np.any(dc < 0):
 103                        raise Exception("Unsorted idx for idl[%s]" % (name))
 104                    if len(dc) == 1:
 105                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 106                    else:
 107                        self.idl[name] = list(idx)
 108                else:
 109                    raise Exception('incompatible type for idl[%s].' % (name))
 110        else:
 111            for name, sample in sorted(zip(names, samples)):
 112                self.idl[name] = range(1, len(sample) + 1)
 113
 114        if kwargs.get("means") is not None:
 115            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 116                self.shape[name] = len(self.idl[name])
 117                self.N += self.shape[name]
 118                self.r_values[name] = mean
 119                self.deltas[name] = sample
 120        else:
 121            for name, sample in sorted(zip(names, samples)):
 122                self.shape[name] = len(self.idl[name])
 123                self.N += self.shape[name]
 124                if len(sample) != self.shape[name]:
 125                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 126                self.r_values[name] = np.mean(sample)
 127                self.deltas[name] = sample - self.r_values[name]
 128                self._value += self.shape[name] * self.r_values[name]
 129            self._value /= self.N
 130
 131        self._dvalue = 0.0
 132        self.ddvalue = 0.0
 133        self.reweighted = False
 134
 135        self.tag = None
 136
 137    @property
 138    def value(self):
 139        return self._value
 140
 141    @property
 142    def dvalue(self):
 143        return self._dvalue
 144
 145    @property
 146    def e_names(self):
 147        return sorted(set([o.split('|')[0] for o in self.names]))
 148
 149    @property
 150    def cov_names(self):
 151        return sorted(set([o for o in self.covobs.keys()]))
 152
 153    @property
 154    def mc_names(self):
 155        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 156
 157    @property
 158    def e_content(self):
 159        res = {}
 160        for e, e_name in enumerate(self.e_names):
 161            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 162            if e_name in self.names:
 163                res[e_name].append(e_name)
 164        return res
 165
 166    @property
 167    def covobs(self):
 168        return self._covobs
 169
 170    def gamma_method(self, **kwargs):
 171        """Estimate the error and related properties of the Obs.
 172
 173        Parameters
 174        ----------
 175        S : float
 176            specifies a custom value for the parameter S (default 2.0).
 177            If set to 0 it is assumed that the data exhibits no
 178            autocorrelation. In this case the error estimates coincides
 179            with the sample standard error.
 180        tau_exp : float
 181            positive value triggers the critical slowing down analysis
 182            (default 0.0).
 183        N_sigma : float
 184            number of standard deviations from zero until the tail is
 185            attached to the autocorrelation function (default 1).
 186        fft : bool
 187            determines whether the fft algorithm is used for the computation
 188            of the autocorrelation function (default True)
 189        """
 190
 191        e_content = self.e_content
 192        self.e_dvalue = {}
 193        self.e_ddvalue = {}
 194        self.e_tauint = {}
 195        self.e_dtauint = {}
 196        self.e_windowsize = {}
 197        self.e_n_tauint = {}
 198        self.e_n_dtauint = {}
 199        e_gamma = {}
 200        self.e_rho = {}
 201        self.e_drho = {}
 202        self._dvalue = 0
 203        self.ddvalue = 0
 204
 205        self.S = {}
 206        self.tau_exp = {}
 207        self.N_sigma = {}
 208
 209        if kwargs.get('fft') is False:
 210            fft = False
 211        else:
 212            fft = True
 213
 214        def _parse_kwarg(kwarg_name):
 215            if kwarg_name in kwargs:
 216                tmp = kwargs.get(kwarg_name)
 217                if isinstance(tmp, (int, float)):
 218                    if tmp < 0:
 219                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 220                    for e, e_name in enumerate(self.e_names):
 221                        getattr(self, kwarg_name)[e_name] = tmp
 222                else:
 223                    raise TypeError(kwarg_name + ' is not in proper format.')
 224            else:
 225                for e, e_name in enumerate(self.e_names):
 226                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 227                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 228                    else:
 229                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 230
 231        _parse_kwarg('S')
 232        _parse_kwarg('tau_exp')
 233        _parse_kwarg('N_sigma')
 234
 235        for e, e_name in enumerate(self.mc_names):
 236            r_length = []
 237            for r_name in e_content[e_name]:
 238                if isinstance(self.idl[r_name], range):
 239                    r_length.append(len(self.idl[r_name]))
 240                else:
 241                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
 242
 243            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 244            w_max = max(r_length) // 2
 245            e_gamma[e_name] = np.zeros(w_max)
 246            self.e_rho[e_name] = np.zeros(w_max)
 247            self.e_drho[e_name] = np.zeros(w_max)
 248
 249            for r_name in e_content[e_name]:
 250                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
 251
 252            gamma_div = np.zeros(w_max)
 253            for r_name in e_content[e_name]:
 254                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
 255            gamma_div[gamma_div < 1] = 1.0
 256            e_gamma[e_name] /= gamma_div[:w_max]
 257
 258            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 259                self.e_tauint[e_name] = 0.5
 260                self.e_dtauint[e_name] = 0.0
 261                self.e_dvalue[e_name] = 0.0
 262                self.e_ddvalue[e_name] = 0.0
 263                self.e_windowsize[e_name] = 0
 264                continue
 265
 266            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 267            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 268            # Make sure no entry of tauint is smaller than 0.5
 269            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 270            # hep-lat/0306017 eq. (42)
 271            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 272            self.e_n_dtauint[e_name][0] = 0.0
 273
 274            def _compute_drho(i):
 275                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
 276                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 277
 278            _compute_drho(1)
 279            if self.tau_exp[e_name] > 0:
 280                texp = self.tau_exp[e_name]
 281                # Critical slowing down analysis
 282                if w_max // 2 <= 1:
 283                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 284                for n in range(1, w_max // 2):
 285                    _compute_drho(n + 1)
 286                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 287                        # Bias correction hep-lat/0306017 eq. (49) included
 288                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 289                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 290                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 291                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 292                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 293                        self.e_windowsize[e_name] = n
 294                        break
 295            else:
 296                if self.S[e_name] == 0.0:
 297                    self.e_tauint[e_name] = 0.5
 298                    self.e_dtauint[e_name] = 0.0
 299                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 300                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 301                    self.e_windowsize[e_name] = 0
 302                else:
 303                    # Standard automatic windowing procedure
 304                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 305                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
 306                    for n in range(1, w_max):
 307                        if n < w_max // 2 - 2:
 308                            _compute_drho(n + 1)
 309                        if g_w[n - 1] < 0 or n >= w_max - 1:
 310                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 311                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 312                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 313                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 314                            self.e_windowsize[e_name] = n
 315                            break
 316
 317            self._dvalue += self.e_dvalue[e_name] ** 2
 318            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 319
 320        for e_name in self.cov_names:
 321            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 322            self.e_ddvalue[e_name] = 0
 323            self._dvalue += self.e_dvalue[e_name]**2
 324
 325        self._dvalue = np.sqrt(self._dvalue)
 326        if self._dvalue == 0.0:
 327            self.ddvalue = 0.0
 328        else:
 329            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 330        return
 331
 332    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
 333        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 334           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 335
 336        Parameters
 337        ----------
 338        deltas : list
 339            List of fluctuations
 340        idx : list
 341            List or range of configurations on which the deltas are defined.
 342        shape : int
 343            Number of configurations in idx.
 344        w_max : int
 345            Upper bound for the summation window.
 346        fft : bool
 347            determines whether the fft algorithm is used for the computation
 348            of the autocorrelation function.
 349        """
 350        gamma = np.zeros(w_max)
 351        deltas = _expand_deltas(deltas, idx, shape)
 352        new_shape = len(deltas)
 353        if fft:
 354            max_gamma = min(new_shape, w_max)
 355            # The padding for the fft has to be even
 356            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 357            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 358        else:
 359            for n in range(w_max):
 360                if new_shape - n >= 0:
 361                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 362
 363        return gamma
 364
 365    def details(self, ens_content=True):
 366        """Output detailed properties of the Obs.
 367
 368        Parameters
 369        ----------
 370        ens_content : bool
 371            print details about the ensembles and replica if true.
 372        """
 373        if self.tag is not None:
 374            print("Description:", self.tag)
 375        if not hasattr(self, 'e_dvalue'):
 376            print('Result\t %3.8e' % (self.value))
 377        else:
 378            if self.value == 0.0:
 379                percentage = np.nan
 380            else:
 381                percentage = np.abs(self._dvalue / self.value) * 100
 382            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 383            if len(self.e_names) > 1:
 384                print(' Ensemble errors:')
 385            for e_name in self.mc_names:
 386                if len(self.e_names) > 1:
 387                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 388                if self.tau_exp[e_name] > 0:
 389                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
 390                else:
 391                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
 392            for e_name in self.cov_names:
 393                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 394        if ens_content is True:
 395            if len(self.e_names) == 1:
 396                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 397            else:
 398                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 399            my_string_list = []
 400            for key, value in sorted(self.e_content.items()):
 401                if key not in self.covobs:
 402                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 403                    if len(value) == 1:
 404                        my_string += f': {self.shape[value[0]]} configurations'
 405                        if isinstance(self.idl[value[0]], range):
 406                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 407                        else:
 408                            my_string += ' (irregular range)'
 409                    else:
 410                        sublist = []
 411                        for v in value:
 412                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 413                            my_substring += f': {self.shape[v]} configurations'
 414                            if isinstance(self.idl[v], range):
 415                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 416                            else:
 417                                my_substring += ' (irregular range)'
 418                            sublist.append(my_substring)
 419
 420                        my_string += '\n' + '\n'.join(sublist)
 421                else:
 422                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 423                my_string_list.append(my_string)
 424            print('\n'.join(my_string_list))
 425
 426    def is_zero_within_error(self, sigma=1):
 427        """Checks whether the observable is zero within 'sigma' standard errors.
 428
 429        Parameters
 430        ----------
 431        sigma : int
 432            Number of standard errors used for the check.
 433
 434        Works only properly when the gamma method was run.
 435        """
 436        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 437
 438    def is_zero(self, atol=1e-10):
 439        """Checks whether the observable is zero within a given tolerance.
 440
 441        Parameters
 442        ----------
 443        atol : float
 444            Absolute tolerance (for details see numpy documentation).
 445        """
 446        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 447
 448    def plot_tauint(self, save=None):
 449        """Plot integrated autocorrelation time for each ensemble.
 450
 451        Parameters
 452        ----------
 453        save : str
 454            saves the figure to a file named 'save' if.
 455        """
 456        if not hasattr(self, 'e_dvalue'):
 457            raise Exception('Run the gamma method first.')
 458
 459        for e, e_name in enumerate(self.mc_names):
 460            fig = plt.figure()
 461            plt.xlabel(r'$W$')
 462            plt.ylabel(r'$\tau_\mathrm{int}$')
 463            length = int(len(self.e_n_tauint[e_name]))
 464            if self.tau_exp[e_name] > 0:
 465                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 466                x_help = np.arange(2 * self.tau_exp[e_name])
 467                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 468                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 469                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 470                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 471                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 472                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 473                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 474            else:
 475                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 476                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 477
 478            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 479            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 480            plt.legend()
 481            plt.xlim(-0.5, xmax)
 482            ylim = plt.ylim()
 483            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 484            plt.draw()
 485            if save:
 486                fig.savefig(save + "_" + str(e))
 487
 488    def plot_rho(self, save=None):
 489        """Plot normalized autocorrelation function time for each ensemble.
 490
 491        Parameters
 492        ----------
 493        save : str
 494            saves the figure to a file named 'save' if.
 495        """
 496        if not hasattr(self, 'e_dvalue'):
 497            raise Exception('Run the gamma method first.')
 498        for e, e_name in enumerate(self.mc_names):
 499            fig = plt.figure()
 500            plt.xlabel('W')
 501            plt.ylabel('rho')
 502            length = int(len(self.e_drho[e_name]))
 503            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 504            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 505            if self.tau_exp[e_name] > 0:
 506                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 507                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 508                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 509                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 510            else:
 511                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 512                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 513            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 514            plt.xlim(-0.5, xmax)
 515            plt.draw()
 516            if save:
 517                fig.savefig(save + "_" + str(e))
 518
 519    def plot_rep_dist(self):
 520        """Plot replica distribution for each ensemble with more than one replicum."""
 521        if not hasattr(self, 'e_dvalue'):
 522            raise Exception('Run the gamma method first.')
 523        for e, e_name in enumerate(self.mc_names):
 524            if len(self.e_content[e_name]) == 1:
 525                print('No replica distribution for a single replicum (', e_name, ')')
 526                continue
 527            r_length = []
 528            sub_r_mean = 0
 529            for r, r_name in enumerate(self.e_content[e_name]):
 530                r_length.append(len(self.deltas[r_name]))
 531                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 532            e_N = np.sum(r_length)
 533            sub_r_mean /= e_N
 534            arr = np.zeros(len(self.e_content[e_name]))
 535            for r, r_name in enumerate(self.e_content[e_name]):
 536                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 537            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 538            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 539            plt.draw()
 540
 541    def plot_history(self, expand=True):
 542        """Plot derived Monte Carlo history for each ensemble
 543
 544        Parameters
 545        ----------
 546        expand : bool
 547            show expanded history for irregular Monte Carlo chains (default: True).
 548        """
 549        for e, e_name in enumerate(self.mc_names):
 550            plt.figure()
 551            r_length = []
 552            tmp = []
 553            tmp_expanded = []
 554            for r, r_name in enumerate(self.e_content[e_name]):
 555                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 556                if expand:
 557                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
 558                    r_length.append(len(tmp_expanded[-1]))
 559                else:
 560                    r_length.append(len(tmp[-1]))
 561            e_N = np.sum(r_length)
 562            x = np.arange(e_N)
 563            y_test = np.concatenate(tmp, axis=0)
 564            if expand:
 565                y = np.concatenate(tmp_expanded, axis=0)
 566            else:
 567                y = y_test
 568            plt.errorbar(x, y, fmt='.', markersize=3)
 569            plt.xlim(-0.5, e_N - 0.5)
 570            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 571            plt.draw()
 572
 573    def plot_piechart(self):
 574        """Plot piechart which shows the fractional contribution of each
 575        ensemble to the error and returns a dictionary containing the fractions."""
 576        if not hasattr(self, 'e_dvalue'):
 577            raise Exception('Run the gamma method first.')
 578        if np.isclose(0.0, self._dvalue, atol=1e-15):
 579            raise Exception('Error is 0.0')
 580        labels = self.e_names
 581        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 582        fig1, ax1 = plt.subplots()
 583        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 584        ax1.axis('equal')
 585        plt.draw()
 586
 587        return dict(zip(self.e_names, sizes))
 588
 589    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 590        """Dump the Obs to a file 'name' of chosen format.
 591
 592        Parameters
 593        ----------
 594        filename : str
 595            name of the file to be saved.
 596        datatype : str
 597            Format of the exported file. Supported formats include
 598            "json.gz" and "pickle"
 599        description : str
 600            Description for output file, only relevant for json.gz format.
 601        path : str
 602            specifies a custom path for the file (default '.')
 603        """
 604        if 'path' in kwargs:
 605            file_name = kwargs.get('path') + '/' + filename
 606        else:
 607            file_name = filename
 608
 609        if datatype == "json.gz":
 610            from .input.json import dump_to_json
 611            dump_to_json([self], file_name, description=description)
 612        elif datatype == "pickle":
 613            with open(file_name + '.p', 'wb') as fb:
 614                pickle.dump(self, fb)
 615        else:
 616            raise Exception("Unknown datatype " + str(datatype))
 617
 618    def export_jackknife(self):
 619        """Export jackknife samples from the Obs
 620
 621        Returns
 622        -------
 623        numpy.ndarray
 624            Returns a numpy array of length N + 1 where N is the number of samples
 625            for the given ensemble and replicum. The zeroth entry of the array contains
 626            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 627            derived from the Obs. The current implementation only works for observables
 628            defined on exactly one ensemble and replicum. The derived jackknife samples
 629            should agree with samples from a full jackknife analysis up to O(1/N).
 630        """
 631
 632        if len(self.names) != 1:
 633            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 634
 635        name = self.names[0]
 636        full_data = self.deltas[name] + self.r_values[name]
 637        n = full_data.size
 638        mean = self.value
 639        tmp_jacks = np.zeros(n + 1)
 640        tmp_jacks[0] = mean
 641        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 642        return tmp_jacks
 643
 644    def __float__(self):
 645        return float(self.value)
 646
 647    def __repr__(self):
 648        return 'Obs[' + str(self) + ']'
 649
 650    def __str__(self):
 651        if self._dvalue == 0.0:
 652            return str(self.value)
 653        fexp = np.floor(np.log10(self._dvalue))
 654        if fexp < 0.0:
 655            return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
 656        elif fexp == 0.0:
 657            return '{:.1f}({:1.1f})'.format(self.value, self._dvalue)
 658        else:
 659            return '{:.0f}({:2.0f})'.format(self.value, self._dvalue)
 660
 661    # Overload comparisons
 662    def __lt__(self, other):
 663        return self.value < other
 664
 665    def __le__(self, other):
 666        return self.value <= other
 667
 668    def __gt__(self, other):
 669        return self.value > other
 670
 671    def __ge__(self, other):
 672        return self.value >= other
 673
 674    def __eq__(self, other):
 675        return (self - other).is_zero()
 676
 677    def __ne__(self, other):
 678        return not (self - other).is_zero()
 679
 680    # Overload math operations
 681    def __add__(self, y):
 682        if isinstance(y, Obs):
 683            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 684        else:
 685            if isinstance(y, np.ndarray):
 686                return np.array([self + o for o in y])
 687            elif y.__class__.__name__ in ['Corr', 'CObs']:
 688                return NotImplemented
 689            else:
 690                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 691
 692    def __radd__(self, y):
 693        return self + y
 694
 695    def __mul__(self, y):
 696        if isinstance(y, Obs):
 697            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 698        else:
 699            if isinstance(y, np.ndarray):
 700                return np.array([self * o for o in y])
 701            elif isinstance(y, complex):
 702                return CObs(self * y.real, self * y.imag)
 703            elif y.__class__.__name__ in ['Corr', 'CObs']:
 704                return NotImplemented
 705            else:
 706                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 707
 708    def __rmul__(self, y):
 709        return self * y
 710
 711    def __sub__(self, y):
 712        if isinstance(y, Obs):
 713            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 714        else:
 715            if isinstance(y, np.ndarray):
 716                return np.array([self - o for o in y])
 717            elif y.__class__.__name__ in ['Corr', 'CObs']:
 718                return NotImplemented
 719            else:
 720                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 721
 722    def __rsub__(self, y):
 723        return -1 * (self - y)
 724
 725    def __pos__(self):
 726        return self
 727
 728    def __neg__(self):
 729        return -1 * self
 730
 731    def __truediv__(self, y):
 732        if isinstance(y, Obs):
 733            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 734        else:
 735            if isinstance(y, np.ndarray):
 736                return np.array([self / o for o in y])
 737            elif y.__class__.__name__ in ['Corr', 'CObs']:
 738                return NotImplemented
 739            else:
 740                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 741
 742    def __rtruediv__(self, y):
 743        if isinstance(y, Obs):
 744            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 745        else:
 746            if isinstance(y, np.ndarray):
 747                return np.array([o / self for o in y])
 748            elif y.__class__.__name__ in ['Corr', 'CObs']:
 749                return NotImplemented
 750            else:
 751                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 752
 753    def __pow__(self, y):
 754        if isinstance(y, Obs):
 755            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 756        else:
 757            return derived_observable(lambda x: x[0] ** y, [self])
 758
 759    def __rpow__(self, y):
 760        if isinstance(y, Obs):
 761            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 762        else:
 763            return derived_observable(lambda x: y ** x[0], [self])
 764
 765    def __abs__(self):
 766        return derived_observable(lambda x: anp.abs(x[0]), [self])
 767
 768    # Overload numpy functions
 769    def sqrt(self):
 770        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 771
 772    def log(self):
 773        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 774
 775    def exp(self):
 776        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 777
 778    def sin(self):
 779        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 780
 781    def cos(self):
 782        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 783
 784    def tan(self):
 785        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 786
 787    def arcsin(self):
 788        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 789
 790    def arccos(self):
 791        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 792
 793    def arctan(self):
 794        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 795
 796    def sinh(self):
 797        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 798
 799    def cosh(self):
 800        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 801
 802    def tanh(self):
 803        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 804
 805    def arcsinh(self):
 806        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 807
 808    def arccosh(self):
 809        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 810
 811    def arctanh(self):
 812        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 813
 814
 815class CObs:
 816    """Class for a complex valued observable."""
 817    __slots__ = ['_real', '_imag', 'tag']
 818
 819    def __init__(self, real, imag=0.0):
 820        self._real = real
 821        self._imag = imag
 822        self.tag = None
 823
 824    @property
 825    def real(self):
 826        return self._real
 827
 828    @property
 829    def imag(self):
 830        return self._imag
 831
 832    def gamma_method(self, **kwargs):
 833        """Executes the gamma_method for the real and the imaginary part."""
 834        if isinstance(self.real, Obs):
 835            self.real.gamma_method(**kwargs)
 836        if isinstance(self.imag, Obs):
 837            self.imag.gamma_method(**kwargs)
 838
 839    def is_zero(self):
 840        """Checks whether both real and imaginary part are zero within machine precision."""
 841        return self.real == 0.0 and self.imag == 0.0
 842
 843    def conjugate(self):
 844        return CObs(self.real, -self.imag)
 845
 846    def __add__(self, other):
 847        if isinstance(other, np.ndarray):
 848            return other + self
 849        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 850            return CObs(self.real + other.real,
 851                        self.imag + other.imag)
 852        else:
 853            return CObs(self.real + other, self.imag)
 854
 855    def __radd__(self, y):
 856        return self + y
 857
 858    def __sub__(self, other):
 859        if isinstance(other, np.ndarray):
 860            return -1 * (other - self)
 861        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 862            return CObs(self.real - other.real, self.imag - other.imag)
 863        else:
 864            return CObs(self.real - other, self.imag)
 865
 866    def __rsub__(self, other):
 867        return -1 * (self - other)
 868
 869    def __mul__(self, other):
 870        if isinstance(other, np.ndarray):
 871            return other * self
 872        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 873            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 874                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 875                                               [self.real, other.real, self.imag, other.imag],
 876                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 877                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 878                                               [self.real, other.real, self.imag, other.imag],
 879                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 880            elif getattr(other, 'imag', 0) != 0:
 881                return CObs(self.real * other.real - self.imag * other.imag,
 882                            self.imag * other.real + self.real * other.imag)
 883            else:
 884                return CObs(self.real * other.real, self.imag * other.real)
 885        else:
 886            return CObs(self.real * other, self.imag * other)
 887
 888    def __rmul__(self, other):
 889        return self * other
 890
 891    def __truediv__(self, other):
 892        if isinstance(other, np.ndarray):
 893            return 1 / (other / self)
 894        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 895            r = other.real ** 2 + other.imag ** 2
 896            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 897        else:
 898            return CObs(self.real / other, self.imag / other)
 899
 900    def __rtruediv__(self, other):
 901        r = self.real ** 2 + self.imag ** 2
 902        if hasattr(other, 'real') and hasattr(other, 'imag'):
 903            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 904        else:
 905            return CObs(self.real * other / r, -self.imag * other / r)
 906
 907    def __abs__(self):
 908        return np.sqrt(self.real**2 + self.imag**2)
 909
 910    def __pos__(self):
 911        return self
 912
 913    def __neg__(self):
 914        return -1 * self
 915
 916    def __eq__(self, other):
 917        return self.real == other.real and self.imag == other.imag
 918
 919    def __str__(self):
 920        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 921
 922    def __repr__(self):
 923        return 'CObs[' + str(self) + ']'
 924
 925
 926def _expand_deltas(deltas, idx, shape):
 927    """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0.
 928       If idx is of type range, the deltas are not changed
 929
 930    Parameters
 931    ----------
 932    deltas : list
 933        List of fluctuations
 934    idx : list
 935        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
 936    shape : int
 937        Number of configs in idx.
 938    """
 939    if isinstance(idx, range):
 940        return deltas
 941    else:
 942        ret = np.zeros(idx[-1] - idx[0] + 1)
 943        for i in range(shape):
 944            ret[idx[i] - idx[0]] = deltas[i]
 945        return ret
 946
 947
 948def _merge_idx(idl):
 949    """Returns the union of all lists in idl as sorted list
 950
 951    Parameters
 952    ----------
 953    idl : list
 954        List of lists or ranges.
 955    """
 956
 957    # Use groupby to efficiently check whether all elements of idl are identical
 958    try:
 959        g = groupby(idl)
 960        if next(g, True) and not next(g, False):
 961            return idl[0]
 962    except Exception:
 963        pass
 964
 965    if np.all([type(idx) is range for idx in idl]):
 966        if len(set([idx[0] for idx in idl])) == 1:
 967            idstart = min([idx.start for idx in idl])
 968            idstop = max([idx.stop for idx in idl])
 969            idstep = min([idx.step for idx in idl])
 970            return range(idstart, idstop, idstep)
 971
 972    return sorted(set().union(*idl))
 973
 974
 975def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
 976    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
 977       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
 978       common divisor of the step sizes is used as new step size.
 979
 980    Parameters
 981    ----------
 982    deltas : list
 983        List of fluctuations
 984    idx : list
 985        List or range of configs on which the deltas are defined.
 986        Has to be a subset of new_idx and has to be sorted in ascending order.
 987    shape : list
 988        Number of configs in idx.
 989    new_idx : list
 990        List of configs that defines the new range, has to be sorted in ascending order.
 991    """
 992
 993    if type(idx) is range and type(new_idx) is range:
 994        if idx == new_idx:
 995            return deltas
 996    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
 997    for i in range(shape):
 998        ret[idx[i] - new_idx[0]] = deltas[i]
 999    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
1000
1001
1002def _filter_zeroes(deltas, idx, eps=Obs.filter_eps):
1003    """Filter out all configurations with vanishing fluctuation such that they do not
1004       contribute to the error estimate anymore. Returns the new deltas and
1005       idx according to the filtering.
1006       A fluctuation is considered to be vanishing, if it is smaller than eps times
1007       the mean of the absolute values of all deltas in one list.
1008
1009    Parameters
1010    ----------
1011    deltas : list
1012        List of fluctuations
1013    idx : list
1014        List or ranges of configs on which the deltas are defined.
1015    eps : float
1016        Prefactor that enters the filter criterion.
1017    """
1018    new_deltas = []
1019    new_idx = []
1020    maxd = np.mean(np.fabs(deltas))
1021    for i in range(len(deltas)):
1022        if abs(deltas[i]) > eps * maxd:
1023            new_deltas.append(deltas[i])
1024            new_idx.append(idx[i])
1025    if new_idx:
1026        return np.array(new_deltas), new_idx
1027    else:
1028        return deltas, idx
1029
1030
1031def derived_observable(func, data, array_mode=False, **kwargs):
1032    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1033
1034    Parameters
1035    ----------
1036    func : object
1037        arbitrary function of the form func(data, **kwargs). For the
1038        automatic differentiation to work, all numpy functions have to have
1039        the autograd wrapper (use 'import autograd.numpy as anp').
1040    data : list
1041        list of Obs, e.g. [obs1, obs2, obs3].
1042    num_grad : bool
1043        if True, numerical derivatives are used instead of autograd
1044        (default False). To control the numerical differentiation the
1045        kwargs of numdifftools.step_generators.MaxStepGenerator
1046        can be used.
1047    man_grad : list
1048        manually supply a list or an array which contains the jacobian
1049        of func. Use cautiously, supplying the wrong derivative will
1050        not be intercepted.
1051
1052    Notes
1053    -----
1054    For simple mathematical operations it can be practical to use anonymous
1055    functions. For the ratio of two observables one can e.g. use
1056
1057    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1058    """
1059
1060    data = np.asarray(data)
1061    raveled_data = data.ravel()
1062
1063    # Workaround for matrix operations containing non Obs data
1064    if not all(isinstance(x, Obs) for x in raveled_data):
1065        for i in range(len(raveled_data)):
1066            if isinstance(raveled_data[i], (int, float)):
1067                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1068
1069    allcov = {}
1070    for o in raveled_data:
1071        for name in o.cov_names:
1072            if name in allcov:
1073                if not np.allclose(allcov[name], o.covobs[name].cov):
1074                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1075            else:
1076                allcov[name] = o.covobs[name].cov
1077
1078    n_obs = len(raveled_data)
1079    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1080    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1081    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1082
1083    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1084    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1085
1086    if data.ndim == 1:
1087        values = np.array([o.value for o in data])
1088    else:
1089        values = np.vectorize(lambda x: x.value)(data)
1090
1091    new_values = func(values, **kwargs)
1092
1093    multi = int(isinstance(new_values, np.ndarray))
1094
1095    new_r_values = {}
1096    new_idl_d = {}
1097    for name in new_sample_names:
1098        idl = []
1099        tmp_values = np.zeros(n_obs)
1100        for i, item in enumerate(raveled_data):
1101            tmp_values[i] = item.r_values.get(name, item.value)
1102            tmp_idl = item.idl.get(name)
1103            if tmp_idl is not None:
1104                idl.append(tmp_idl)
1105        if multi > 0:
1106            tmp_values = np.array(tmp_values).reshape(data.shape)
1107        new_r_values[name] = func(tmp_values, **kwargs)
1108        new_idl_d[name] = _merge_idx(idl)
1109        if not is_merged[name]:
1110            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1111
1112    if 'man_grad' in kwargs:
1113        deriv = np.asarray(kwargs.get('man_grad'))
1114        if new_values.shape + data.shape != deriv.shape:
1115            raise Exception('Manual derivative does not have correct shape.')
1116    elif kwargs.get('num_grad') is True:
1117        if multi > 0:
1118            raise Exception('Multi mode currently not supported for numerical derivative')
1119        options = {
1120            'base_step': 0.1,
1121            'step_ratio': 2.5}
1122        for key in options.keys():
1123            kwarg = kwargs.get(key)
1124            if kwarg is not None:
1125                options[key] = kwarg
1126        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1127        if tmp_df.size == 1:
1128            deriv = np.array([tmp_df.real])
1129        else:
1130            deriv = tmp_df.real
1131    else:
1132        deriv = jacobian(func)(values, **kwargs)
1133
1134    final_result = np.zeros(new_values.shape, dtype=object)
1135
1136    if array_mode is True:
1137
1138        class _Zero_grad():
1139            def __init__(self, N):
1140                self.grad = np.zeros((N, 1))
1141
1142        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1143        d_extracted = {}
1144        g_extracted = {}
1145        for name in new_sample_names:
1146            d_extracted[name] = []
1147            ens_length = len(new_idl_d[name])
1148            for i_dat, dat in enumerate(data):
1149                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1150        for name in new_cov_names:
1151            g_extracted[name] = []
1152            zero_grad = _Zero_grad(new_covobs_lengths[name])
1153            for i_dat, dat in enumerate(data):
1154                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1155
1156    for i_val, new_val in np.ndenumerate(new_values):
1157        new_deltas = {}
1158        new_grad = {}
1159        if array_mode is True:
1160            for name in new_sample_names:
1161                ens_length = d_extracted[name][0].shape[-1]
1162                new_deltas[name] = np.zeros(ens_length)
1163                for i_dat, dat in enumerate(d_extracted[name]):
1164                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1165            for name in new_cov_names:
1166                new_grad[name] = 0
1167                for i_dat, dat in enumerate(g_extracted[name]):
1168                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1169        else:
1170            for j_obs, obs in np.ndenumerate(data):
1171                for name in obs.names:
1172                    if name in obs.cov_names:
1173                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1174                    else:
1175                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1176
1177        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1178
1179        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1180            raise Exception('The same name has been used for deltas and covobs!')
1181        new_samples = []
1182        new_means = []
1183        new_idl = []
1184        new_names_obs = []
1185        for name in new_names:
1186            if name not in new_covobs:
1187                if is_merged[name]:
1188                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1189                else:
1190                    filtered_deltas = new_deltas[name]
1191                    filtered_idl_d = new_idl_d[name]
1192
1193                new_samples.append(filtered_deltas)
1194                new_idl.append(filtered_idl_d)
1195                new_means.append(new_r_values[name][i_val])
1196                new_names_obs.append(name)
1197        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1198        for name in new_covobs:
1199            final_result[i_val].names.append(name)
1200        final_result[i_val]._covobs = new_covobs
1201        final_result[i_val]._value = new_val
1202        final_result[i_val].is_merged = is_merged
1203        final_result[i_val].reweighted = reweighted
1204
1205    if multi == 0:
1206        final_result = final_result.item()
1207
1208    return final_result
1209
1210
1211def _reduce_deltas(deltas, idx_old, idx_new):
1212    """Extract deltas defined on idx_old on all configs of idx_new.
1213
1214    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1215    are ordered in an ascending order.
1216
1217    Parameters
1218    ----------
1219    deltas : list
1220        List of fluctuations
1221    idx_old : list
1222        List or range of configs on which the deltas are defined
1223    idx_new : list
1224        List of configs for which we want to extract the deltas.
1225        Has to be a subset of idx_old.
1226    """
1227    if not len(deltas) == len(idx_old):
1228        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1229    if type(idx_old) is range and type(idx_new) is range:
1230        if idx_old == idx_new:
1231            return deltas
1232    shape = len(idx_new)
1233    ret = np.zeros(shape)
1234    oldpos = 0
1235    for i in range(shape):
1236        pos = -1
1237        for j in range(oldpos, len(idx_old)):
1238            if idx_old[j] == idx_new[i]:
1239                pos = j
1240                break
1241        if pos < 0:
1242            raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i]))
1243        ret[i] = deltas[pos]
1244        oldpos = pos
1245    return np.array(ret)
1246
1247
1248def reweight(weight, obs, **kwargs):
1249    """Reweight a list of observables.
1250
1251    Parameters
1252    ----------
1253    weight : Obs
1254        Reweighting factor. An Observable that has to be defined on a superset of the
1255        configurations in obs[i].idl for all i.
1256    obs : list
1257        list of Obs, e.g. [obs1, obs2, obs3].
1258    all_configs : bool
1259        if True, the reweighted observables are normalized by the average of
1260        the reweighting factor on all configurations in weight.idl and not
1261        on the configurations in obs[i].idl.
1262    """
1263    result = []
1264    for i in range(len(obs)):
1265        if len(obs[i].cov_names):
1266            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1267        if not set(obs[i].names).issubset(weight.names):
1268            raise Exception('Error: Ensembles do not fit')
1269        for name in obs[i].names:
1270            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1271                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1272        new_samples = []
1273        w_deltas = {}
1274        for name in sorted(obs[i].names):
1275            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1276            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1277        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1278
1279        if kwargs.get('all_configs'):
1280            new_weight = weight
1281        else:
1282            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1283
1284        result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs))
1285        result[-1].reweighted = True
1286        result[-1].is_merged = obs[i].is_merged
1287
1288    return result
1289
1290
1291def correlate(obs_a, obs_b):
1292    """Correlate two observables.
1293
1294    Parameters
1295    ----------
1296    obs_a : Obs
1297        First observable
1298    obs_b : Obs
1299        Second observable
1300
1301    Notes
1302    -----
1303    Keep in mind to only correlate primary observables which have not been reweighted
1304    yet. The reweighting has to be applied after correlating the observables.
1305    Currently only works if ensembles are identical (this is not strictly necessary).
1306    """
1307
1308    if sorted(obs_a.names) != sorted(obs_b.names):
1309        raise Exception('Ensembles do not fit')
1310    if len(obs_a.cov_names) or len(obs_b.cov_names):
1311        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1312    for name in obs_a.names:
1313        if obs_a.shape[name] != obs_b.shape[name]:
1314            raise Exception('Shapes of ensemble', name, 'do not fit')
1315        if obs_a.idl[name] != obs_b.idl[name]:
1316            raise Exception('idl of ensemble', name, 'do not fit')
1317
1318    if obs_a.reweighted is True:
1319        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1320    if obs_b.reweighted is True:
1321        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1322
1323    new_samples = []
1324    new_idl = []
1325    for name in sorted(obs_a.names):
1326        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1327        new_idl.append(obs_a.idl[name])
1328
1329    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1330    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1331    o.reweighted = obs_a.reweighted or obs_b.reweighted
1332    return o
1333
1334
1335def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1336    r'''Calculates the covariance matrix of a set of observables.
1337
1338    The gamma method has to be applied first to all observables.
1339
1340    Parameters
1341    ----------
1342    obs : list or numpy.ndarray
1343        List or one dimensional array of Obs
1344    visualize : bool
1345        If True plots the corresponding normalized correlation matrix (default False).
1346    correlation : bool
1347        If True the correlation instead of the covariance is returned (default False).
1348    smooth : None or int
1349        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1350        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1351        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1352        small ones.
1353
1354    Notes
1355    -----
1356    The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1357    $$v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1358    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1359    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1360    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1361    '''
1362
1363    length = len(obs)
1364
1365    max_samples = np.max([o.N for o in obs])
1366    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1367        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1368
1369    cov = np.zeros((length, length))
1370    for i in range(length):
1371        for j in range(i, length):
1372            cov[i, j] = _covariance_element(obs[i], obs[j])
1373    cov = cov + cov.T - np.diag(np.diag(cov))
1374
1375    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1376
1377    if isinstance(smooth, int):
1378        corr = _smooth_eigenvalues(corr, smooth)
1379
1380    errors = [o.dvalue for o in obs]
1381    cov = np.diag(errors) @ corr @ np.diag(errors)
1382
1383    eigenvalues = np.linalg.eigh(cov)[0]
1384    if not np.all(eigenvalues >= 0):
1385        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1386
1387    if visualize:
1388        plt.matshow(corr, vmin=-1, vmax=1)
1389        plt.set_cmap('RdBu')
1390        plt.colorbar()
1391        plt.draw()
1392
1393    if correlation is True:
1394        return corr
1395    else:
1396        return cov
1397
1398
1399def _smooth_eigenvalues(corr, E):
1400    """Eigenvalue smoothing as described in hep-lat/9412087
1401
1402    corr : np.ndarray
1403        correlation matrix
1404    E : integer
1405        Number of eigenvalues to be left substantially unchanged
1406    """
1407    if not (2 < E < corr.shape[0] - 1):
1408        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1409    vals, vec = np.linalg.eigh(corr)
1410    lambda_min = np.mean(vals[:-E])
1411    vals[vals < lambda_min] = lambda_min
1412    vals /= np.mean(vals)
1413    return vec @ np.diag(vals) @ vec.T
1414
1415
1416def _covariance_element(obs1, obs2):
1417    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1418
1419    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1420        deltas1 = _expand_deltas_for_merge(deltas1, idx1, len(idx1), new_idx)
1421        deltas2 = _expand_deltas_for_merge(deltas2, idx2, len(idx2), new_idx)
1422        return np.sum(deltas1 * deltas2)
1423
1424    if set(obs1.names).isdisjoint(set(obs2.names)):
1425        return 0.0
1426
1427    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1428        raise Exception('The gamma method has to be applied to both Obs first.')
1429
1430    dvalue = 0.0
1431
1432    for e_name in obs1.mc_names:
1433
1434        if e_name not in obs2.mc_names:
1435            continue
1436
1437        idl_d = {}
1438        for r_name in obs1.e_content[e_name]:
1439            if r_name not in obs2.e_content[e_name]:
1440                continue
1441            idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]])
1442
1443        gamma = 0.0
1444
1445        for r_name in obs1.e_content[e_name]:
1446            if r_name not in obs2.e_content[e_name]:
1447                continue
1448            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1449
1450        if gamma == 0.0:
1451            continue
1452
1453        gamma_div = 0.0
1454        e_N = 0
1455        for r_name in obs1.e_content[e_name]:
1456            if r_name not in obs2.e_content[e_name]:
1457                continue
1458            gamma_div += calc_gamma(np.ones(obs1.shape[r_name]), np.ones(obs2.shape[r_name]), obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1459            e_N += len(idl_d[r_name])
1460        gamma /= max(gamma_div, 1.0)
1461
1462        # Bias correction hep-lat/0306017 eq. (49)
1463        dvalue += (1 + 1 / e_N) * gamma / e_N
1464
1465    for e_name in obs1.cov_names:
1466
1467        if e_name not in obs2.cov_names:
1468            continue
1469
1470        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1471
1472    return dvalue
1473
1474
1475def import_jackknife(jacks, name, idl=None):
1476    """Imports jackknife samples and returns an Obs
1477
1478    Parameters
1479    ----------
1480    jacks : numpy.ndarray
1481        numpy array containing the mean value as zeroth entry and
1482        the N jackknife samples as first to Nth entry.
1483    name : str
1484        name of the ensemble the samples are defined on.
1485    """
1486    length = len(jacks) - 1
1487    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1488    samples = jacks[1:] @ prj
1489    mean = np.mean(samples)
1490    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1491    new_obs._value = jacks[0]
1492    return new_obs
1493
1494
1495def merge_obs(list_of_obs):
1496    """Combine all observables in list_of_obs into one new observable
1497
1498    Parameters
1499    ----------
1500    list_of_obs : list
1501        list of the Obs object to be combined
1502
1503    Notes
1504    -----
1505    It is not possible to combine obs which are based on the same replicum
1506    """
1507    replist = [item for obs in list_of_obs for item in obs.names]
1508    if (len(replist) == len(set(replist))) is False:
1509        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1510    if any([len(o.cov_names) for o in list_of_obs]):
1511        raise Exception('Not possible to merge data that contains covobs!')
1512    new_dict = {}
1513    idl_dict = {}
1514    for o in list_of_obs:
1515        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1516                        for key in set(o.deltas) | set(o.r_values)})
1517        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1518
1519    names = sorted(new_dict.keys())
1520    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1521    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1522    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1523    return o
1524
1525
1526def cov_Obs(means, cov, name, grad=None):
1527    """Create an Obs based on mean(s) and a covariance matrix
1528
1529    Parameters
1530    ----------
1531    mean : list of floats or float
1532        N mean value(s) of the new Obs
1533    cov : list or array
1534        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1535    name : str
1536        identifier for the covariance matrix
1537    grad : list or array
1538        Gradient of the Covobs wrt. the means belonging to cov.
1539    """
1540
1541    def covobs_to_obs(co):
1542        """Make an Obs out of a Covobs
1543
1544        Parameters
1545        ----------
1546        co : Covobs
1547            Covobs to be embedded into the Obs
1548        """
1549        o = Obs([], [], means=[])
1550        o._value = co.value
1551        o.names.append(co.name)
1552        o._covobs[co.name] = co
1553        o._dvalue = np.sqrt(co.errsq())
1554        return o
1555
1556    ol = []
1557    if isinstance(means, (float, int)):
1558        means = [means]
1559
1560    for i in range(len(means)):
1561        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1562    if ol[0].covobs[name].N != len(means):
1563        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1564    if len(ol) == 1:
1565        return ol[0]
1566    return ol
#   class Obs:
View Source
 13class Obs:
 14    """Class for a general observable.
 15
 16    Instances of Obs are the basic objects of a pyerrors error analysis.
 17    They are initialized with a list which contains arrays of samples for
 18    different ensembles/replica and another list of same length which contains
 19    the names of the ensembles/replica. Mathematical operations can be
 20    performed on instances. The result is another instance of Obs. The error of
 21    an instance can be computed with the gamma_method. Also contains additional
 22    methods for output and visualization of the error calculation.
 23
 24    Attributes
 25    ----------
 26    S_global : float
 27        Standard value for S (default 2.0)
 28    S_dict : dict
 29        Dictionary for S values. If an entry for a given ensemble
 30        exists this overwrites the standard value for that ensemble.
 31    tau_exp_global : float
 32        Standard value for tau_exp (default 0.0)
 33    tau_exp_dict : dict
 34        Dictionary for tau_exp values. If an entry for a given ensemble exists
 35        this overwrites the standard value for that ensemble.
 36    N_sigma_global : float
 37        Standard value for N_sigma (default 1.0)
 38    N_sigma_dict : dict
 39        Dictionary for N_sigma values. If an entry for a given ensemble exists
 40        this overwrites the standard value for that ensemble.
 41    """
 42    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 43                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 44                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 45                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 46                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
 47
 48    S_global = 2.0
 49    S_dict = {}
 50    tau_exp_global = 0.0
 51    tau_exp_dict = {}
 52    N_sigma_global = 1.0
 53    N_sigma_dict = {}
 54    filter_eps = 1e-10
 55
 56    def __init__(self, samples, names, idl=None, **kwargs):
 57        """ Initialize Obs object.
 58
 59        Parameters
 60        ----------
 61        samples : list
 62            list of numpy arrays containing the Monte Carlo samples
 63        names : list
 64            list of strings labeling the individual samples
 65        idl : list, optional
 66            list of ranges or lists on which the samples are defined
 67        """
 68
 69        if kwargs.get("means") is None and len(samples):
 70            if len(samples) != len(names):
 71                raise Exception('Length of samples and names incompatible.')
 72            if idl is not None:
 73                if len(idl) != len(names):
 74                    raise Exception('Length of idl incompatible with samples and names.')
 75            name_length = len(names)
 76            if name_length > 1:
 77                if name_length != len(set(names)):
 78                    raise Exception('names are not unique.')
 79                if not all(isinstance(x, str) for x in names):
 80                    raise TypeError('All names have to be strings.')
 81            else:
 82                if not isinstance(names[0], str):
 83                    raise TypeError('All names have to be strings.')
 84            if min(len(x) for x in samples) <= 4:
 85                raise Exception('Samples have to have at least 5 entries.')
 86
 87        self.names = sorted(names)
 88        self.shape = {}
 89        self.r_values = {}
 90        self.deltas = {}
 91        self._covobs = {}
 92
 93        self._value = 0
 94        self.N = 0
 95        self.is_merged = {}
 96        self.idl = {}
 97        if idl is not None:
 98            for name, idx in sorted(zip(names, idl)):
 99                if isinstance(idx, range):
100                    self.idl[name] = idx
101                elif isinstance(idx, (list, np.ndarray)):
102                    dc = np.unique(np.diff(idx))
103                    if np.any(dc < 0):
104                        raise Exception("Unsorted idx for idl[%s]" % (name))
105                    if len(dc) == 1:
106                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
107                    else:
108                        self.idl[name] = list(idx)
109                else:
110                    raise Exception('incompatible type for idl[%s].' % (name))
111        else:
112            for name, sample in sorted(zip(names, samples)):
113                self.idl[name] = range(1, len(sample) + 1)
114
115        if kwargs.get("means") is not None:
116            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
117                self.shape[name] = len(self.idl[name])
118                self.N += self.shape[name]
119                self.r_values[name] = mean
120                self.deltas[name] = sample
121        else:
122            for name, sample in sorted(zip(names, samples)):
123                self.shape[name] = len(self.idl[name])
124                self.N += self.shape[name]
125                if len(sample) != self.shape[name]:
126                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
127                self.r_values[name] = np.mean(sample)
128                self.deltas[name] = sample - self.r_values[name]
129                self._value += self.shape[name] * self.r_values[name]
130            self._value /= self.N
131
132        self._dvalue = 0.0
133        self.ddvalue = 0.0
134        self.reweighted = False
135
136        self.tag = None
137
138    @property
139    def value(self):
140        return self._value
141
142    @property
143    def dvalue(self):
144        return self._dvalue
145
146    @property
147    def e_names(self):
148        return sorted(set([o.split('|')[0] for o in self.names]))
149
150    @property
151    def cov_names(self):
152        return sorted(set([o for o in self.covobs.keys()]))
153
154    @property
155    def mc_names(self):
156        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
157
158    @property
159    def e_content(self):
160        res = {}
161        for e, e_name in enumerate(self.e_names):
162            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
163            if e_name in self.names:
164                res[e_name].append(e_name)
165        return res
166
167    @property
168    def covobs(self):
169        return self._covobs
170
171    def gamma_method(self, **kwargs):
172        """Estimate the error and related properties of the Obs.
173
174        Parameters
175        ----------
176        S : float
177            specifies a custom value for the parameter S (default 2.0).
178            If set to 0 it is assumed that the data exhibits no
179            autocorrelation. In this case the error estimates coincides
180            with the sample standard error.
181        tau_exp : float
182            positive value triggers the critical slowing down analysis
183            (default 0.0).
184        N_sigma : float
185            number of standard deviations from zero until the tail is
186            attached to the autocorrelation function (default 1).
187        fft : bool
188            determines whether the fft algorithm is used for the computation
189            of the autocorrelation function (default True)
190        """
191
192        e_content = self.e_content
193        self.e_dvalue = {}
194        self.e_ddvalue = {}
195        self.e_tauint = {}
196        self.e_dtauint = {}
197        self.e_windowsize = {}
198        self.e_n_tauint = {}
199        self.e_n_dtauint = {}
200        e_gamma = {}
201        self.e_rho = {}
202        self.e_drho = {}
203        self._dvalue = 0
204        self.ddvalue = 0
205
206        self.S = {}
207        self.tau_exp = {}
208        self.N_sigma = {}
209
210        if kwargs.get('fft') is False:
211            fft = False
212        else:
213            fft = True
214
215        def _parse_kwarg(kwarg_name):
216            if kwarg_name in kwargs:
217                tmp = kwargs.get(kwarg_name)
218                if isinstance(tmp, (int, float)):
219                    if tmp < 0:
220                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
221                    for e, e_name in enumerate(self.e_names):
222                        getattr(self, kwarg_name)[e_name] = tmp
223                else:
224                    raise TypeError(kwarg_name + ' is not in proper format.')
225            else:
226                for e, e_name in enumerate(self.e_names):
227                    if e_name in getattr(Obs, kwarg_name + '_dict'):
228                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
229                    else:
230                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
231
232        _parse_kwarg('S')
233        _parse_kwarg('tau_exp')
234        _parse_kwarg('N_sigma')
235
236        for e, e_name in enumerate(self.mc_names):
237            r_length = []
238            for r_name in e_content[e_name]:
239                if isinstance(self.idl[r_name], range):
240                    r_length.append(len(self.idl[r_name]))
241                else:
242                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
243
244            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
245            w_max = max(r_length) // 2
246            e_gamma[e_name] = np.zeros(w_max)
247            self.e_rho[e_name] = np.zeros(w_max)
248            self.e_drho[e_name] = np.zeros(w_max)
249
250            for r_name in e_content[e_name]:
251                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
252
253            gamma_div = np.zeros(w_max)
254            for r_name in e_content[e_name]:
255                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
256            gamma_div[gamma_div < 1] = 1.0
257            e_gamma[e_name] /= gamma_div[:w_max]
258
259            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
260                self.e_tauint[e_name] = 0.5
261                self.e_dtauint[e_name] = 0.0
262                self.e_dvalue[e_name] = 0.0
263                self.e_ddvalue[e_name] = 0.0
264                self.e_windowsize[e_name] = 0
265                continue
266
267            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
268            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
269            # Make sure no entry of tauint is smaller than 0.5
270            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
271            # hep-lat/0306017 eq. (42)
272            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
273            self.e_n_dtauint[e_name][0] = 0.0
274
275            def _compute_drho(i):
276                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
277                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
278
279            _compute_drho(1)
280            if self.tau_exp[e_name] > 0:
281                texp = self.tau_exp[e_name]
282                # Critical slowing down analysis
283                if w_max // 2 <= 1:
284                    raise Exception("Need at least 8 samples for tau_exp error analysis")
285                for n in range(1, w_max // 2):
286                    _compute_drho(n + 1)
287                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
288                        # Bias correction hep-lat/0306017 eq. (49) included
289                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
290                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
291                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
292                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
293                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
294                        self.e_windowsize[e_name] = n
295                        break
296            else:
297                if self.S[e_name] == 0.0:
298                    self.e_tauint[e_name] = 0.5
299                    self.e_dtauint[e_name] = 0.0
300                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
301                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
302                    self.e_windowsize[e_name] = 0
303                else:
304                    # Standard automatic windowing procedure
305                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
306                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
307                    for n in range(1, w_max):
308                        if n < w_max // 2 - 2:
309                            _compute_drho(n + 1)
310                        if g_w[n - 1] < 0 or n >= w_max - 1:
311                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
312                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
313                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
314                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
315                            self.e_windowsize[e_name] = n
316                            break
317
318            self._dvalue += self.e_dvalue[e_name] ** 2
319            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
320
321        for e_name in self.cov_names:
322            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
323            self.e_ddvalue[e_name] = 0
324            self._dvalue += self.e_dvalue[e_name]**2
325
326        self._dvalue = np.sqrt(self._dvalue)
327        if self._dvalue == 0.0:
328            self.ddvalue = 0.0
329        else:
330            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
331        return
332
333    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
334        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
335           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
336
337        Parameters
338        ----------
339        deltas : list
340            List of fluctuations
341        idx : list
342            List or range of configurations on which the deltas are defined.
343        shape : int
344            Number of configurations in idx.
345        w_max : int
346            Upper bound for the summation window.
347        fft : bool
348            determines whether the fft algorithm is used for the computation
349            of the autocorrelation function.
350        """
351        gamma = np.zeros(w_max)
352        deltas = _expand_deltas(deltas, idx, shape)
353        new_shape = len(deltas)
354        if fft:
355            max_gamma = min(new_shape, w_max)
356            # The padding for the fft has to be even
357            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
358            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
359        else:
360            for n in range(w_max):
361                if new_shape - n >= 0:
362                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
363
364        return gamma
365
366    def details(self, ens_content=True):
367        """Output detailed properties of the Obs.
368
369        Parameters
370        ----------
371        ens_content : bool
372            print details about the ensembles and replica if true.
373        """
374        if self.tag is not None:
375            print("Description:", self.tag)
376        if not hasattr(self, 'e_dvalue'):
377            print('Result\t %3.8e' % (self.value))
378        else:
379            if self.value == 0.0:
380                percentage = np.nan
381            else:
382                percentage = np.abs(self._dvalue / self.value) * 100
383            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
384            if len(self.e_names) > 1:
385                print(' Ensemble errors:')
386            for e_name in self.mc_names:
387                if len(self.e_names) > 1:
388                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
389                if self.tau_exp[e_name] > 0:
390                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
391                else:
392                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
393            for e_name in self.cov_names:
394                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
395        if ens_content is True:
396            if len(self.e_names) == 1:
397                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
398            else:
399                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
400            my_string_list = []
401            for key, value in sorted(self.e_content.items()):
402                if key not in self.covobs:
403                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
404                    if len(value) == 1:
405                        my_string += f': {self.shape[value[0]]} configurations'
406                        if isinstance(self.idl[value[0]], range):
407                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
408                        else:
409                            my_string += ' (irregular range)'
410                    else:
411                        sublist = []
412                        for v in value:
413                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
414                            my_substring += f': {self.shape[v]} configurations'
415                            if isinstance(self.idl[v], range):
416                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
417                            else:
418                                my_substring += ' (irregular range)'
419                            sublist.append(my_substring)
420
421                        my_string += '\n' + '\n'.join(sublist)
422                else:
423                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
424                my_string_list.append(my_string)
425            print('\n'.join(my_string_list))
426
427    def is_zero_within_error(self, sigma=1):
428        """Checks whether the observable is zero within 'sigma' standard errors.
429
430        Parameters
431        ----------
432        sigma : int
433            Number of standard errors used for the check.
434
435        Works only properly when the gamma method was run.
436        """
437        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
438
439    def is_zero(self, atol=1e-10):
440        """Checks whether the observable is zero within a given tolerance.
441
442        Parameters
443        ----------
444        atol : float
445            Absolute tolerance (for details see numpy documentation).
446        """
447        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
448
449    def plot_tauint(self, save=None):
450        """Plot integrated autocorrelation time for each ensemble.
451
452        Parameters
453        ----------
454        save : str
455            saves the figure to a file named 'save' if.
456        """
457        if not hasattr(self, 'e_dvalue'):
458            raise Exception('Run the gamma method first.')
459
460        for e, e_name in enumerate(self.mc_names):
461            fig = plt.figure()
462            plt.xlabel(r'$W$')
463            plt.ylabel(r'$\tau_\mathrm{int}$')
464            length = int(len(self.e_n_tauint[e_name]))
465            if self.tau_exp[e_name] > 0:
466                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
467                x_help = np.arange(2 * self.tau_exp[e_name])
468                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
469                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
470                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
471                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
472                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
473                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
474                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
475            else:
476                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
477                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
478
479            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
480            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
481            plt.legend()
482            plt.xlim(-0.5, xmax)
483            ylim = plt.ylim()
484            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
485            plt.draw()
486            if save:
487                fig.savefig(save + "_" + str(e))
488
489    def plot_rho(self, save=None):
490        """Plot normalized autocorrelation function time for each ensemble.
491
492        Parameters
493        ----------
494        save : str
495            saves the figure to a file named 'save' if.
496        """
497        if not hasattr(self, 'e_dvalue'):
498            raise Exception('Run the gamma method first.')
499        for e, e_name in enumerate(self.mc_names):
500            fig = plt.figure()
501            plt.xlabel('W')
502            plt.ylabel('rho')
503            length = int(len(self.e_drho[e_name]))
504            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
505            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
506            if self.tau_exp[e_name] > 0:
507                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
508                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
509                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
510                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
511            else:
512                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
513                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
514            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
515            plt.xlim(-0.5, xmax)
516            plt.draw()
517            if save:
518                fig.savefig(save + "_" + str(e))
519
520    def plot_rep_dist(self):
521        """Plot replica distribution for each ensemble with more than one replicum."""
522        if not hasattr(self, 'e_dvalue'):
523            raise Exception('Run the gamma method first.')
524        for e, e_name in enumerate(self.mc_names):
525            if len(self.e_content[e_name]) == 1:
526                print('No replica distribution for a single replicum (', e_name, ')')
527                continue
528            r_length = []
529            sub_r_mean = 0
530            for r, r_name in enumerate(self.e_content[e_name]):
531                r_length.append(len(self.deltas[r_name]))
532                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
533            e_N = np.sum(r_length)
534            sub_r_mean /= e_N
535            arr = np.zeros(len(self.e_content[e_name]))
536            for r, r_name in enumerate(self.e_content[e_name]):
537                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
538            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
539            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
540            plt.draw()
541
542    def plot_history(self, expand=True):
543        """Plot derived Monte Carlo history for each ensemble
544
545        Parameters
546        ----------
547        expand : bool
548            show expanded history for irregular Monte Carlo chains (default: True).
549        """
550        for e, e_name in enumerate(self.mc_names):
551            plt.figure()
552            r_length = []
553            tmp = []
554            tmp_expanded = []
555            for r, r_name in enumerate(self.e_content[e_name]):
556                tmp.append(self.deltas[r_name] + self.r_values[r_name])
557                if expand:
558                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
559                    r_length.append(len(tmp_expanded[-1]))
560                else:
561                    r_length.append(len(tmp[-1]))
562            e_N = np.sum(r_length)
563            x = np.arange(e_N)
564            y_test = np.concatenate(tmp, axis=0)
565            if expand:
566                y = np.concatenate(tmp_expanded, axis=0)
567            else:
568                y = y_test
569            plt.errorbar(x, y, fmt='.', markersize=3)
570            plt.xlim(-0.5, e_N - 0.5)
571            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
572            plt.draw()
573
574    def plot_piechart(self):
575        """Plot piechart which shows the fractional contribution of each
576        ensemble to the error and returns a dictionary containing the fractions."""
577        if not hasattr(self, 'e_dvalue'):
578            raise Exception('Run the gamma method first.')
579        if np.isclose(0.0, self._dvalue, atol=1e-15):
580            raise Exception('Error is 0.0')
581        labels = self.e_names
582        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
583        fig1, ax1 = plt.subplots()
584        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
585        ax1.axis('equal')
586        plt.draw()
587
588        return dict(zip(self.e_names, sizes))
589
590    def dump(self, filename, datatype="json.gz", description="", **kwargs):
591        """Dump the Obs to a file 'name' of chosen format.
592
593        Parameters
594        ----------
595        filename : str
596            name of the file to be saved.
597        datatype : str
598            Format of the exported file. Supported formats include
599            "json.gz" and "pickle"
600        description : str
601            Description for output file, only relevant for json.gz format.
602        path : str
603            specifies a custom path for the file (default '.')
604        """
605        if 'path' in kwargs:
606            file_name = kwargs.get('path') + '/' + filename
607        else:
608            file_name = filename
609
610        if datatype == "json.gz":
611            from .input.json import dump_to_json
612            dump_to_json([self], file_name, description=description)
613        elif datatype == "pickle":
614            with open(file_name + '.p', 'wb') as fb:
615                pickle.dump(self, fb)
616        else:
617            raise Exception("Unknown datatype " + str(datatype))
618
619    def export_jackknife(self):
620        """Export jackknife samples from the Obs
621
622        Returns
623        -------
624        numpy.ndarray
625            Returns a numpy array of length N + 1 where N is the number of samples
626            for the given ensemble and replicum. The zeroth entry of the array contains
627            the mean value of the Obs, entries 1 to N contain the N jackknife samples
628            derived from the Obs. The current implementation only works for observables
629            defined on exactly one ensemble and replicum. The derived jackknife samples
630            should agree with samples from a full jackknife analysis up to O(1/N).
631        """
632
633        if len(self.names) != 1:
634            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
635
636        name = self.names[0]
637        full_data = self.deltas[name] + self.r_values[name]
638        n = full_data.size
639        mean = self.value
640        tmp_jacks = np.zeros(n + 1)
641        tmp_jacks[0] = mean
642        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
643        return tmp_jacks
644
645    def __float__(self):
646        return float(self.value)
647
648    def __repr__(self):
649        return 'Obs[' + str(self) + ']'
650
651    def __str__(self):
652        if self._dvalue == 0.0:
653            return str(self.value)
654        fexp = np.floor(np.log10(self._dvalue))
655        if fexp < 0.0:
656            return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
657        elif fexp == 0.0:
658            return '{:.1f}({:1.1f})'.format(self.value, self._dvalue)
659        else:
660            return '{:.0f}({:2.0f})'.format(self.value, self._dvalue)
661
662    # Overload comparisons
663    def __lt__(self, other):
664        return self.value < other
665
666    def __le__(self, other):
667        return self.value <= other
668
669    def __gt__(self, other):
670        return self.value > other
671
672    def __ge__(self, other):
673        return self.value >= other
674
675    def __eq__(self, other):
676        return (self - other).is_zero()
677
678    def __ne__(self, other):
679        return not (self - other).is_zero()
680
681    # Overload math operations
682    def __add__(self, y):
683        if isinstance(y, Obs):
684            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
685        else:
686            if isinstance(y, np.ndarray):
687                return np.array([self + o for o in y])
688            elif y.__class__.__name__ in ['Corr', 'CObs']:
689                return NotImplemented
690            else:
691                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
692
693    def __radd__(self, y):
694        return self + y
695
696    def __mul__(self, y):
697        if isinstance(y, Obs):
698            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
699        else:
700            if isinstance(y, np.ndarray):
701                return np.array([self * o for o in y])
702            elif isinstance(y, complex):
703                return CObs(self * y.real, self * y.imag)
704            elif y.__class__.__name__ in ['Corr', 'CObs']:
705                return NotImplemented
706            else:
707                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
708
709    def __rmul__(self, y):
710        return self * y
711
712    def __sub__(self, y):
713        if isinstance(y, Obs):
714            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
715        else:
716            if isinstance(y, np.ndarray):
717                return np.array([self - o for o in y])
718            elif y.__class__.__name__ in ['Corr', 'CObs']:
719                return NotImplemented
720            else:
721                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
722
723    def __rsub__(self, y):
724        return -1 * (self - y)
725
726    def __pos__(self):
727        return self
728
729    def __neg__(self):
730        return -1 * self
731
732    def __truediv__(self, y):
733        if isinstance(y, Obs):
734            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
735        else:
736            if isinstance(y, np.ndarray):
737                return np.array([self / o for o in y])
738            elif y.__class__.__name__ in ['Corr', 'CObs']:
739                return NotImplemented
740            else:
741                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
742
743    def __rtruediv__(self, y):
744        if isinstance(y, Obs):
745            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
746        else:
747            if isinstance(y, np.ndarray):
748                return np.array([o / self for o in y])
749            elif y.__class__.__name__ in ['Corr', 'CObs']:
750                return NotImplemented
751            else:
752                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
753
754    def __pow__(self, y):
755        if isinstance(y, Obs):
756            return derived_observable(lambda x: x[0] ** x[1], [self, y])
757        else:
758            return derived_observable(lambda x: x[0] ** y, [self])
759
760    def __rpow__(self, y):
761        if isinstance(y, Obs):
762            return derived_observable(lambda x: x[0] ** x[1], [y, self])
763        else:
764            return derived_observable(lambda x: y ** x[0], [self])
765
766    def __abs__(self):
767        return derived_observable(lambda x: anp.abs(x[0]), [self])
768
769    # Overload numpy functions
770    def sqrt(self):
771        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
772
773    def log(self):
774        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
775
776    def exp(self):
777        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
778
779    def sin(self):
780        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
781
782    def cos(self):
783        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
784
785    def tan(self):
786        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
787
788    def arcsin(self):
789        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
790
791    def arccos(self):
792        return derived_observable(lambda x: anp.arccos(x[0]), [self])
793
794    def arctan(self):
795        return derived_observable(lambda x: anp.arctan(x[0]), [self])
796
797    def sinh(self):
798        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
799
800    def cosh(self):
801        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
802
803    def tanh(self):
804        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
805
806    def arcsinh(self):
807        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
808
809    def arccosh(self):
810        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
811
812    def arctanh(self):
813        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
#   Obs(samples, names, idl=None, **kwargs)
View Source
 56    def __init__(self, samples, names, idl=None, **kwargs):
 57        """ Initialize Obs object.
 58
 59        Parameters
 60        ----------
 61        samples : list
 62            list of numpy arrays containing the Monte Carlo samples
 63        names : list
 64            list of strings labeling the individual samples
 65        idl : list, optional
 66            list of ranges or lists on which the samples are defined
 67        """
 68
 69        if kwargs.get("means") is None and len(samples):
 70            if len(samples) != len(names):
 71                raise Exception('Length of samples and names incompatible.')
 72            if idl is not None:
 73                if len(idl) != len(names):
 74                    raise Exception('Length of idl incompatible with samples and names.')
 75            name_length = len(names)
 76            if name_length > 1:
 77                if name_length != len(set(names)):
 78                    raise Exception('names are not unique.')
 79                if not all(isinstance(x, str) for x in names):
 80                    raise TypeError('All names have to be strings.')
 81            else:
 82                if not isinstance(names[0], str):
 83                    raise TypeError('All names have to be strings.')
 84            if min(len(x) for x in samples) <= 4:
 85                raise Exception('Samples have to have at least 5 entries.')
 86
 87        self.names = sorted(names)
 88        self.shape = {}
 89        self.r_values = {}
 90        self.deltas = {}
 91        self._covobs = {}
 92
 93        self._value = 0
 94        self.N = 0
 95        self.is_merged = {}
 96        self.idl = {}
 97        if idl is not None:
 98            for name, idx in sorted(zip(names, idl)):
 99                if isinstance(idx, range):
100                    self.idl[name] = idx
101                elif isinstance(idx, (list, np.ndarray)):
102                    dc = np.unique(np.diff(idx))
103                    if np.any(dc < 0):
104                        raise Exception("Unsorted idx for idl[%s]" % (name))
105                    if len(dc) == 1:
106                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
107                    else:
108                        self.idl[name] = list(idx)
109                else:
110                    raise Exception('incompatible type for idl[%s].' % (name))
111        else:
112            for name, sample in sorted(zip(names, samples)):
113                self.idl[name] = range(1, len(sample) + 1)
114
115        if kwargs.get("means") is not None:
116            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
117                self.shape[name] = len(self.idl[name])
118                self.N += self.shape[name]
119                self.r_values[name] = mean
120                self.deltas[name] = sample
121        else:
122            for name, sample in sorted(zip(names, samples)):
123                self.shape[name] = len(self.idl[name])
124                self.N += self.shape[name]
125                if len(sample) != self.shape[name]:
126                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
127                self.r_values[name] = np.mean(sample)
128                self.deltas[name] = sample - self.r_values[name]
129                self._value += self.shape[name] * self.r_values[name]
130            self._value /= self.N
131
132        self._dvalue = 0.0
133        self.ddvalue = 0.0
134        self.reweighted = False
135
136        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
#   S_global = 2.0
#   S_dict = {}
#   tau_exp_global = 0.0
#   tau_exp_dict = {}
#   N_sigma_global = 1.0
#   N_sigma_dict = {}
#   filter_eps = 1e-10
#   names
#   shape
#   r_values
#   deltas
#   is_merged
#   idl
#   ddvalue
#   reweighted
#   tag
#   value
#   dvalue
#   e_names
#   cov_names
#   mc_names
#   e_content
#   covobs
#   def gamma_method(self, **kwargs):
View Source
171    def gamma_method(self, **kwargs):
172        """Estimate the error and related properties of the Obs.
173
174        Parameters
175        ----------
176        S : float
177            specifies a custom value for the parameter S (default 2.0).
178            If set to 0 it is assumed that the data exhibits no
179            autocorrelation. In this case the error estimates coincides
180            with the sample standard error.
181        tau_exp : float
182            positive value triggers the critical slowing down analysis
183            (default 0.0).
184        N_sigma : float
185            number of standard deviations from zero until the tail is
186            attached to the autocorrelation function (default 1).
187        fft : bool
188            determines whether the fft algorithm is used for the computation
189            of the autocorrelation function (default True)
190        """
191
192        e_content = self.e_content
193        self.e_dvalue = {}
194        self.e_ddvalue = {}
195        self.e_tauint = {}
196        self.e_dtauint = {}
197        self.e_windowsize = {}
198        self.e_n_tauint = {}
199        self.e_n_dtauint = {}
200        e_gamma = {}
201        self.e_rho = {}
202        self.e_drho = {}
203        self._dvalue = 0
204        self.ddvalue = 0
205
206        self.S = {}
207        self.tau_exp = {}
208        self.N_sigma = {}
209
210        if kwargs.get('fft') is False:
211            fft = False
212        else:
213            fft = True
214
215        def _parse_kwarg(kwarg_name):
216            if kwarg_name in kwargs:
217                tmp = kwargs.get(kwarg_name)
218                if isinstance(tmp, (int, float)):
219                    if tmp < 0:
220                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
221                    for e, e_name in enumerate(self.e_names):
222                        getattr(self, kwarg_name)[e_name] = tmp
223                else:
224                    raise TypeError(kwarg_name + ' is not in proper format.')
225            else:
226                for e, e_name in enumerate(self.e_names):
227                    if e_name in getattr(Obs, kwarg_name + '_dict'):
228                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
229                    else:
230                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
231
232        _parse_kwarg('S')
233        _parse_kwarg('tau_exp')
234        _parse_kwarg('N_sigma')
235
236        for e, e_name in enumerate(self.mc_names):
237            r_length = []
238            for r_name in e_content[e_name]:
239                if isinstance(self.idl[r_name], range):
240                    r_length.append(len(self.idl[r_name]))
241                else:
242                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
243
244            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
245            w_max = max(r_length) // 2
246            e_gamma[e_name] = np.zeros(w_max)
247            self.e_rho[e_name] = np.zeros(w_max)
248            self.e_drho[e_name] = np.zeros(w_max)
249
250            for r_name in e_content[e_name]:
251                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
252
253            gamma_div = np.zeros(w_max)
254            for r_name in e_content[e_name]:
255                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
256            gamma_div[gamma_div < 1] = 1.0
257            e_gamma[e_name] /= gamma_div[:w_max]
258
259            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
260                self.e_tauint[e_name] = 0.5
261                self.e_dtauint[e_name] = 0.0
262                self.e_dvalue[e_name] = 0.0
263                self.e_ddvalue[e_name] = 0.0
264                self.e_windowsize[e_name] = 0
265                continue
266
267            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
268            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
269            # Make sure no entry of tauint is smaller than 0.5
270            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
271            # hep-lat/0306017 eq. (42)
272            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
273            self.e_n_dtauint[e_name][0] = 0.0
274
275            def _compute_drho(i):
276                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
277                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
278
279            _compute_drho(1)
280            if self.tau_exp[e_name] > 0:
281                texp = self.tau_exp[e_name]
282                # Critical slowing down analysis
283                if w_max // 2 <= 1:
284                    raise Exception("Need at least 8 samples for tau_exp error analysis")
285                for n in range(1, w_max // 2):
286                    _compute_drho(n + 1)
287                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
288                        # Bias correction hep-lat/0306017 eq. (49) included
289                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
290                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
291                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
292                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
293                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
294                        self.e_windowsize[e_name] = n
295                        break
296            else:
297                if self.S[e_name] == 0.0:
298                    self.e_tauint[e_name] = 0.5
299                    self.e_dtauint[e_name] = 0.0
300                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
301                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
302                    self.e_windowsize[e_name] = 0
303                else:
304                    # Standard automatic windowing procedure
305                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
306                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
307                    for n in range(1, w_max):
308                        if n < w_max // 2 - 2:
309                            _compute_drho(n + 1)
310                        if g_w[n - 1] < 0 or n >= w_max - 1:
311                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
312                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
313                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
314                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
315                            self.e_windowsize[e_name] = n
316                            break
317
318            self._dvalue += self.e_dvalue[e_name] ** 2
319            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
320
321        for e_name in self.cov_names:
322            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
323            self.e_ddvalue[e_name] = 0
324            self._dvalue += self.e_dvalue[e_name]**2
325
326        self._dvalue = np.sqrt(self._dvalue)
327        if self._dvalue == 0.0:
328            self.ddvalue = 0.0
329        else:
330            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
331        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
#   def details(self, ens_content=True):
View Source
366    def details(self, ens_content=True):
367        """Output detailed properties of the Obs.
368
369        Parameters
370        ----------
371        ens_content : bool
372            print details about the ensembles and replica if true.
373        """
374        if self.tag is not None:
375            print("Description:", self.tag)
376        if not hasattr(self, 'e_dvalue'):
377            print('Result\t %3.8e' % (self.value))
378        else:
379            if self.value == 0.0:
380                percentage = np.nan
381            else:
382                percentage = np.abs(self._dvalue / self.value) * 100
383            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
384            if len(self.e_names) > 1:
385                print(' Ensemble errors:')
386            for e_name in self.mc_names:
387                if len(self.e_names) > 1:
388                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
389                if self.tau_exp[e_name] > 0:
390                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
391                else:
392                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
393            for e_name in self.cov_names:
394                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
395        if ens_content is True:
396            if len(self.e_names) == 1:
397                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
398            else:
399                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
400            my_string_list = []
401            for key, value in sorted(self.e_content.items()):
402                if key not in self.covobs:
403                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
404                    if len(value) == 1:
405                        my_string += f': {self.shape[value[0]]} configurations'
406                        if isinstance(self.idl[value[0]], range):
407                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
408                        else:
409                            my_string += ' (irregular range)'
410                    else:
411                        sublist = []
412                        for v in value:
413                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
414                            my_substring += f': {self.shape[v]} configurations'
415                            if isinstance(self.idl[v], range):
416                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
417                            else:
418                                my_substring += ' (irregular range)'
419                            sublist.append(my_substring)
420
421                        my_string += '\n' + '\n'.join(sublist)
422                else:
423                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
424                my_string_list.append(my_string)
425            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
#   def is_zero_within_error(self, sigma=1):
View Source
427    def is_zero_within_error(self, sigma=1):
428        """Checks whether the observable is zero within 'sigma' standard errors.
429
430        Parameters
431        ----------
432        sigma : int
433            Number of standard errors used for the check.
434
435        Works only properly when the gamma method was run.
436        """
437        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
#   def is_zero(self, atol=1e-10):
View Source
439    def is_zero(self, atol=1e-10):
440        """Checks whether the observable is zero within a given tolerance.
441
442        Parameters
443        ----------
444        atol : float
445            Absolute tolerance (for details see numpy documentation).
446        """
447        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
#   def plot_tauint(self, save=None):
View Source
449    def plot_tauint(self, save=None):
450        """Plot integrated autocorrelation time for each ensemble.
451
452        Parameters
453        ----------
454        save : str
455            saves the figure to a file named 'save' if.
456        """
457        if not hasattr(self, 'e_dvalue'):
458            raise Exception('Run the gamma method first.')
459
460        for e, e_name in enumerate(self.mc_names):
461            fig = plt.figure()
462            plt.xlabel(r'$W$')
463            plt.ylabel(r'$\tau_\mathrm{int}$')
464            length = int(len(self.e_n_tauint[e_name]))
465            if self.tau_exp[e_name] > 0:
466                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
467                x_help = np.arange(2 * self.tau_exp[e_name])
468                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
469                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
470                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
471                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
472                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
473                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
474                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
475            else:
476                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
477                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
478
479            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
480            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
481            plt.legend()
482            plt.xlim(-0.5, xmax)
483            ylim = plt.ylim()
484            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
485            plt.draw()
486            if save:
487                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
#   def plot_rho(self, save=None):
View Source
489    def plot_rho(self, save=None):
490        """Plot normalized autocorrelation function time for each ensemble.
491
492        Parameters
493        ----------
494        save : str
495            saves the figure to a file named 'save' if.
496        """
497        if not hasattr(self, 'e_dvalue'):
498            raise Exception('Run the gamma method first.')
499        for e, e_name in enumerate(self.mc_names):
500            fig = plt.figure()
501            plt.xlabel('W')
502            plt.ylabel('rho')
503            length = int(len(self.e_drho[e_name]))
504            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
505            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
506            if self.tau_exp[e_name] > 0:
507                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
508                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
509                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
510                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
511            else:
512                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
513                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
514            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
515            plt.xlim(-0.5, xmax)
516            plt.draw()
517            if save:
518                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
#   def plot_rep_dist(self):
View Source
520    def plot_rep_dist(self):
521        """Plot replica distribution for each ensemble with more than one replicum."""
522        if not hasattr(self, 'e_dvalue'):
523            raise Exception('Run the gamma method first.')
524        for e, e_name in enumerate(self.mc_names):
525            if len(self.e_content[e_name]) == 1:
526                print('No replica distribution for a single replicum (', e_name, ')')
527                continue
528            r_length = []
529            sub_r_mean = 0
530            for r, r_name in enumerate(self.e_content[e_name]):
531                r_length.append(len(self.deltas[r_name]))
532                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
533            e_N = np.sum(r_length)
534            sub_r_mean /= e_N
535            arr = np.zeros(len(self.e_content[e_name]))
536            for r, r_name in enumerate(self.e_content[e_name]):
537                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
538            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
539            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
540            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

#   def plot_history(self, expand=True):
View Source
542    def plot_history(self, expand=True):
543        """Plot derived Monte Carlo history for each ensemble
544
545        Parameters
546        ----------
547        expand : bool
548            show expanded history for irregular Monte Carlo chains (default: True).
549        """
550        for e, e_name in enumerate(self.mc_names):
551            plt.figure()
552            r_length = []
553            tmp = []
554            tmp_expanded = []
555            for r, r_name in enumerate(self.e_content[e_name]):
556                tmp.append(self.deltas[r_name] + self.r_values[r_name])
557                if expand:
558                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
559                    r_length.append(len(tmp_expanded[-1]))
560                else:
561                    r_length.append(len(tmp[-1]))
562            e_N = np.sum(r_length)
563            x = np.arange(e_N)
564            y_test = np.concatenate(tmp, axis=0)
565            if expand:
566                y = np.concatenate(tmp_expanded, axis=0)
567            else:
568                y = y_test
569            plt.errorbar(x, y, fmt='.', markersize=3)
570            plt.xlim(-0.5, e_N - 0.5)
571            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
572            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
#   def plot_piechart(self):
View Source
574    def plot_piechart(self):
575        """Plot piechart which shows the fractional contribution of each
576        ensemble to the error and returns a dictionary containing the fractions."""
577        if not hasattr(self, 'e_dvalue'):
578            raise Exception('Run the gamma method first.')
579        if np.isclose(0.0, self._dvalue, atol=1e-15):
580            raise Exception('Error is 0.0')
581        labels = self.e_names
582        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
583        fig1, ax1 = plt.subplots()
584        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
585        ax1.axis('equal')
586        plt.draw()
587
588        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

#   def dump(self, filename, datatype='json.gz', description='', **kwargs):
View Source
590    def dump(self, filename, datatype="json.gz", description="", **kwargs):
591        """Dump the Obs to a file 'name' of chosen format.
592
593        Parameters
594        ----------
595        filename : str
596            name of the file to be saved.
597        datatype : str
598            Format of the exported file. Supported formats include
599            "json.gz" and "pickle"
600        description : str
601            Description for output file, only relevant for json.gz format.
602        path : str
603            specifies a custom path for the file (default '.')
604        """
605        if 'path' in kwargs:
606            file_name = kwargs.get('path') + '/' + filename
607        else:
608            file_name = filename
609
610        if datatype == "json.gz":
611            from .input.json import dump_to_json
612            dump_to_json([self], file_name, description=description)
613        elif datatype == "pickle":
614            with open(file_name + '.p', 'wb') as fb:
615                pickle.dump(self, fb)
616        else:
617            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
#   def export_jackknife(self):
View Source
619    def export_jackknife(self):
620        """Export jackknife samples from the Obs
621
622        Returns
623        -------
624        numpy.ndarray
625            Returns a numpy array of length N + 1 where N is the number of samples
626            for the given ensemble and replicum. The zeroth entry of the array contains
627            the mean value of the Obs, entries 1 to N contain the N jackknife samples
628            derived from the Obs. The current implementation only works for observables
629            defined on exactly one ensemble and replicum. The derived jackknife samples
630            should agree with samples from a full jackknife analysis up to O(1/N).
631        """
632
633        if len(self.names) != 1:
634            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
635
636        name = self.names[0]
637        full_data = self.deltas[name] + self.r_values[name]
638        n = full_data.size
639        mean = self.value
640        tmp_jacks = np.zeros(n + 1)
641        tmp_jacks[0] = mean
642        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
643        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
#   def sqrt(self):
View Source
770    def sqrt(self):
771        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
#   def log(self):
View Source
773    def log(self):
774        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
#   def exp(self):
View Source
776    def exp(self):
777        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
#   def sin(self):
View Source
779    def sin(self):
780        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
#   def cos(self):
View Source
782    def cos(self):
783        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
#   def tan(self):
View Source
785    def tan(self):
786        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
#   def arcsin(self):
View Source
788    def arcsin(self):
789        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
#   def arccos(self):
View Source
791    def arccos(self):
792        return derived_observable(lambda x: anp.arccos(x[0]), [self])
#   def arctan(self):
View Source
794    def arctan(self):
795        return derived_observable(lambda x: anp.arctan(x[0]), [self])
#   def sinh(self):
View Source
797    def sinh(self):
798        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
#   def cosh(self):
View Source
800    def cosh(self):
801        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
#   def tanh(self):
View Source
803    def tanh(self):
804        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
#   def arcsinh(self):
View Source
806    def arcsinh(self):
807        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
#   def arccosh(self):
View Source
809    def arccosh(self):
810        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
#   def arctanh(self):
View Source
812    def arctanh(self):
813        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
#   N_sigma
#   e_ddvalue
#   e_drho
#   e_dtauint
#   e_dvalue
#   e_n_dtauint
#   e_n_tauint
#   e_rho
#   e_tauint
#   e_windowsize
#   tau_exp
#   class CObs:
View Source
816class CObs:
817    """Class for a complex valued observable."""
818    __slots__ = ['_real', '_imag', 'tag']
819
820    def __init__(self, real, imag=0.0):
821        self._real = real
822        self._imag = imag
823        self.tag = None
824
825    @property
826    def real(self):
827        return self._real
828
829    @property
830    def imag(self):
831        return self._imag
832
833    def gamma_method(self, **kwargs):
834        """Executes the gamma_method for the real and the imaginary part."""
835        if isinstance(self.real, Obs):
836            self.real.gamma_method(**kwargs)
837        if isinstance(self.imag, Obs):
838            self.imag.gamma_method(**kwargs)
839
840    def is_zero(self):
841        """Checks whether both real and imaginary part are zero within machine precision."""
842        return self.real == 0.0 and self.imag == 0.0
843
844    def conjugate(self):
845        return CObs(self.real, -self.imag)
846
847    def __add__(self, other):
848        if isinstance(other, np.ndarray):
849            return other + self
850        elif hasattr(other, 'real') and hasattr(other, 'imag'):
851            return CObs(self.real + other.real,
852                        self.imag + other.imag)
853        else:
854            return CObs(self.real + other, self.imag)
855
856    def __radd__(self, y):
857        return self + y
858
859    def __sub__(self, other):
860        if isinstance(other, np.ndarray):
861            return -1 * (other - self)
862        elif hasattr(other, 'real') and hasattr(other, 'imag'):
863            return CObs(self.real - other.real, self.imag - other.imag)
864        else:
865            return CObs(self.real - other, self.imag)
866
867    def __rsub__(self, other):
868        return -1 * (self - other)
869
870    def __mul__(self, other):
871        if isinstance(other, np.ndarray):
872            return other * self
873        elif hasattr(other, 'real') and hasattr(other, 'imag'):
874            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
875                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
876                                               [self.real, other.real, self.imag, other.imag],
877                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
878                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
879                                               [self.real, other.real, self.imag, other.imag],
880                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
881            elif getattr(other, 'imag', 0) != 0:
882                return CObs(self.real * other.real - self.imag * other.imag,
883                            self.imag * other.real + self.real * other.imag)
884            else:
885                return CObs(self.real * other.real, self.imag * other.real)
886        else:
887            return CObs(self.real * other, self.imag * other)
888
889    def __rmul__(self, other):
890        return self * other
891
892    def __truediv__(self, other):
893        if isinstance(other, np.ndarray):
894            return 1 / (other / self)
895        elif hasattr(other, 'real') and hasattr(other, 'imag'):
896            r = other.real ** 2 + other.imag ** 2
897            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
898        else:
899            return CObs(self.real / other, self.imag / other)
900
901    def __rtruediv__(self, other):
902        r = self.real ** 2 + self.imag ** 2
903        if hasattr(other, 'real') and hasattr(other, 'imag'):
904            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
905        else:
906            return CObs(self.real * other / r, -self.imag * other / r)
907
908    def __abs__(self):
909        return np.sqrt(self.real**2 + self.imag**2)
910
911    def __pos__(self):
912        return self
913
914    def __neg__(self):
915        return -1 * self
916
917    def __eq__(self, other):
918        return self.real == other.real and self.imag == other.imag
919
920    def __str__(self):
921        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
922
923    def __repr__(self):
924        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

#   CObs(real, imag=0.0)
View Source
820    def __init__(self, real, imag=0.0):
821        self._real = real
822        self._imag = imag
823        self.tag = None
#   tag
#   real
#   imag
#   def gamma_method(self, **kwargs):
View Source
833    def gamma_method(self, **kwargs):
834        """Executes the gamma_method for the real and the imaginary part."""
835        if isinstance(self.real, Obs):
836            self.real.gamma_method(**kwargs)
837        if isinstance(self.imag, Obs):
838            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

#   def is_zero(self):
View Source
840    def is_zero(self):
841        """Checks whether both real and imaginary part are zero within machine precision."""
842        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

#   def conjugate(self):
View Source
844    def conjugate(self):
845        return CObs(self.real, -self.imag)
#   def derived_observable(func, data, array_mode=False, **kwargs):
View Source
1032def derived_observable(func, data, array_mode=False, **kwargs):
1033    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1034
1035    Parameters
1036    ----------
1037    func : object
1038        arbitrary function of the form func(data, **kwargs). For the
1039        automatic differentiation to work, all numpy functions have to have
1040        the autograd wrapper (use 'import autograd.numpy as anp').
1041    data : list
1042        list of Obs, e.g. [obs1, obs2, obs3].
1043    num_grad : bool
1044        if True, numerical derivatives are used instead of autograd
1045        (default False). To control the numerical differentiation the
1046        kwargs of numdifftools.step_generators.MaxStepGenerator
1047        can be used.
1048    man_grad : list
1049        manually supply a list or an array which contains the jacobian
1050        of func. Use cautiously, supplying the wrong derivative will
1051        not be intercepted.
1052
1053    Notes
1054    -----
1055    For simple mathematical operations it can be practical to use anonymous
1056    functions. For the ratio of two observables one can e.g. use
1057
1058    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1059    """
1060
1061    data = np.asarray(data)
1062    raveled_data = data.ravel()
1063
1064    # Workaround for matrix operations containing non Obs data
1065    if not all(isinstance(x, Obs) for x in raveled_data):
1066        for i in range(len(raveled_data)):
1067            if isinstance(raveled_data[i], (int, float)):
1068                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1069
1070    allcov = {}
1071    for o in raveled_data:
1072        for name in o.cov_names:
1073            if name in allcov:
1074                if not np.allclose(allcov[name], o.covobs[name].cov):
1075                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1076            else:
1077                allcov[name] = o.covobs[name].cov
1078
1079    n_obs = len(raveled_data)
1080    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1081    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1082    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1083
1084    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1085    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1086
1087    if data.ndim == 1:
1088        values = np.array([o.value for o in data])
1089    else:
1090        values = np.vectorize(lambda x: x.value)(data)
1091
1092    new_values = func(values, **kwargs)
1093
1094    multi = int(isinstance(new_values, np.ndarray))
1095
1096    new_r_values = {}
1097    new_idl_d = {}
1098    for name in new_sample_names:
1099        idl = []
1100        tmp_values = np.zeros(n_obs)
1101        for i, item in enumerate(raveled_data):
1102            tmp_values[i] = item.r_values.get(name, item.value)
1103            tmp_idl = item.idl.get(name)
1104            if tmp_idl is not None:
1105                idl.append(tmp_idl)
1106        if multi > 0:
1107            tmp_values = np.array(tmp_values).reshape(data.shape)
1108        new_r_values[name] = func(tmp_values, **kwargs)
1109        new_idl_d[name] = _merge_idx(idl)
1110        if not is_merged[name]:
1111            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1112
1113    if 'man_grad' in kwargs:
1114        deriv = np.asarray(kwargs.get('man_grad'))
1115        if new_values.shape + data.shape != deriv.shape:
1116            raise Exception('Manual derivative does not have correct shape.')
1117    elif kwargs.get('num_grad') is True:
1118        if multi > 0:
1119            raise Exception('Multi mode currently not supported for numerical derivative')
1120        options = {
1121            'base_step': 0.1,
1122            'step_ratio': 2.5}
1123        for key in options.keys():
1124            kwarg = kwargs.get(key)
1125            if kwarg is not None:
1126                options[key] = kwarg
1127        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1128        if tmp_df.size == 1:
1129            deriv = np.array([tmp_df.real])
1130        else:
1131            deriv = tmp_df.real
1132    else:
1133        deriv = jacobian(func)(values, **kwargs)
1134
1135    final_result = np.zeros(new_values.shape, dtype=object)
1136
1137    if array_mode is True:
1138
1139        class _Zero_grad():
1140            def __init__(self, N):
1141                self.grad = np.zeros((N, 1))
1142
1143        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1144        d_extracted = {}
1145        g_extracted = {}
1146        for name in new_sample_names:
1147            d_extracted[name] = []
1148            ens_length = len(new_idl_d[name])
1149            for i_dat, dat in enumerate(data):
1150                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1151        for name in new_cov_names:
1152            g_extracted[name] = []
1153            zero_grad = _Zero_grad(new_covobs_lengths[name])
1154            for i_dat, dat in enumerate(data):
1155                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1156
1157    for i_val, new_val in np.ndenumerate(new_values):
1158        new_deltas = {}
1159        new_grad = {}
1160        if array_mode is True:
1161            for name in new_sample_names:
1162                ens_length = d_extracted[name][0].shape[-1]
1163                new_deltas[name] = np.zeros(ens_length)
1164                for i_dat, dat in enumerate(d_extracted[name]):
1165                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1166            for name in new_cov_names:
1167                new_grad[name] = 0
1168                for i_dat, dat in enumerate(g_extracted[name]):
1169                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1170        else:
1171            for j_obs, obs in np.ndenumerate(data):
1172                for name in obs.names:
1173                    if name in obs.cov_names:
1174                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1175                    else:
1176                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1177
1178        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1179
1180        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1181            raise Exception('The same name has been used for deltas and covobs!')
1182        new_samples = []
1183        new_means = []
1184        new_idl = []
1185        new_names_obs = []
1186        for name in new_names:
1187            if name not in new_covobs:
1188                if is_merged[name]:
1189                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1190                else:
1191                    filtered_deltas = new_deltas[name]
1192                    filtered_idl_d = new_idl_d[name]
1193
1194                new_samples.append(filtered_deltas)
1195                new_idl.append(filtered_idl_d)
1196                new_means.append(new_r_values[name][i_val])
1197                new_names_obs.append(name)
1198        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1199        for name in new_covobs:
1200            final_result[i_val].names.append(name)
1201        final_result[i_val]._covobs = new_covobs
1202        final_result[i_val]._value = new_val
1203        final_result[i_val].is_merged = is_merged
1204        final_result[i_val].reweighted = reweighted
1205
1206    if multi == 0:
1207        final_result = final_result.item()
1208
1209    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

#   def reweight(weight, obs, **kwargs):
View Source
1249def reweight(weight, obs, **kwargs):
1250    """Reweight a list of observables.
1251
1252    Parameters
1253    ----------
1254    weight : Obs
1255        Reweighting factor. An Observable that has to be defined on a superset of the
1256        configurations in obs[i].idl for all i.
1257    obs : list
1258        list of Obs, e.g. [obs1, obs2, obs3].
1259    all_configs : bool
1260        if True, the reweighted observables are normalized by the average of
1261        the reweighting factor on all configurations in weight.idl and not
1262        on the configurations in obs[i].idl.
1263    """
1264    result = []
1265    for i in range(len(obs)):
1266        if len(obs[i].cov_names):
1267            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1268        if not set(obs[i].names).issubset(weight.names):
1269            raise Exception('Error: Ensembles do not fit')
1270        for name in obs[i].names:
1271            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1272                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1273        new_samples = []
1274        w_deltas = {}
1275        for name in sorted(obs[i].names):
1276            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1277            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1278        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1279
1280        if kwargs.get('all_configs'):
1281            new_weight = weight
1282        else:
1283            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1284
1285        result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs))
1286        result[-1].reweighted = True
1287        result[-1].is_merged = obs[i].is_merged
1288
1289    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl.
#   def correlate(obs_a, obs_b):
View Source
1292def correlate(obs_a, obs_b):
1293    """Correlate two observables.
1294
1295    Parameters
1296    ----------
1297    obs_a : Obs
1298        First observable
1299    obs_b : Obs
1300        Second observable
1301
1302    Notes
1303    -----
1304    Keep in mind to only correlate primary observables which have not been reweighted
1305    yet. The reweighting has to be applied after correlating the observables.
1306    Currently only works if ensembles are identical (this is not strictly necessary).
1307    """
1308
1309    if sorted(obs_a.names) != sorted(obs_b.names):
1310        raise Exception('Ensembles do not fit')
1311    if len(obs_a.cov_names) or len(obs_b.cov_names):
1312        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1313    for name in obs_a.names:
1314        if obs_a.shape[name] != obs_b.shape[name]:
1315            raise Exception('Shapes of ensemble', name, 'do not fit')
1316        if obs_a.idl[name] != obs_b.idl[name]:
1317            raise Exception('idl of ensemble', name, 'do not fit')
1318
1319    if obs_a.reweighted is True:
1320        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1321    if obs_b.reweighted is True:
1322        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1323
1324    new_samples = []
1325    new_idl = []
1326    for name in sorted(obs_a.names):
1327        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1328        new_idl.append(obs_a.idl[name])
1329
1330    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1331    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1332    o.reweighted = obs_a.reweighted or obs_b.reweighted
1333    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

#   def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
View Source
1336def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1337    r'''Calculates the covariance matrix of a set of observables.
1338
1339    The gamma method has to be applied first to all observables.
1340
1341    Parameters
1342    ----------
1343    obs : list or numpy.ndarray
1344        List or one dimensional array of Obs
1345    visualize : bool
1346        If True plots the corresponding normalized correlation matrix (default False).
1347    correlation : bool
1348        If True the correlation instead of the covariance is returned (default False).
1349    smooth : None or int
1350        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1351        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1352        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1353        small ones.
1354
1355    Notes
1356    -----
1357    The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1358    $$v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1359    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1360    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1361    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1362    '''
1363
1364    length = len(obs)
1365
1366    max_samples = np.max([o.N for o in obs])
1367    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1368        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1369
1370    cov = np.zeros((length, length))
1371    for i in range(length):
1372        for j in range(i, length):
1373            cov[i, j] = _covariance_element(obs[i], obs[j])
1374    cov = cov + cov.T - np.diag(np.diag(cov))
1375
1376    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1377
1378    if isinstance(smooth, int):
1379        corr = _smooth_eigenvalues(corr, smooth)
1380
1381    errors = [o.dvalue for o in obs]
1382    cov = np.diag(errors) @ corr @ np.diag(errors)
1383
1384    eigenvalues = np.linalg.eigh(cov)[0]
1385    if not np.all(eigenvalues >= 0):
1386        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1387
1388    if visualize:
1389        plt.matshow(corr, vmin=-1, vmax=1)
1390        plt.set_cmap('RdBu')
1391        plt.colorbar()
1392        plt.draw()
1393
1394    if correlation is True:
1395        return corr
1396    else:
1397        return cov

Calculates the covariance matrix of a set of observables.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation instead of the covariance is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

#   def import_jackknife(jacks, name, idl=None):
View Source
1476def import_jackknife(jacks, name, idl=None):
1477    """Imports jackknife samples and returns an Obs
1478
1479    Parameters
1480    ----------
1481    jacks : numpy.ndarray
1482        numpy array containing the mean value as zeroth entry and
1483        the N jackknife samples as first to Nth entry.
1484    name : str
1485        name of the ensemble the samples are defined on.
1486    """
1487    length = len(jacks) - 1
1488    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1489    samples = jacks[1:] @ prj
1490    mean = np.mean(samples)
1491    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1492    new_obs._value = jacks[0]
1493    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
#   def merge_obs(list_of_obs):
View Source
1496def merge_obs(list_of_obs):
1497    """Combine all observables in list_of_obs into one new observable
1498
1499    Parameters
1500    ----------
1501    list_of_obs : list
1502        list of the Obs object to be combined
1503
1504    Notes
1505    -----
1506    It is not possible to combine obs which are based on the same replicum
1507    """
1508    replist = [item for obs in list_of_obs for item in obs.names]
1509    if (len(replist) == len(set(replist))) is False:
1510        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1511    if any([len(o.cov_names) for o in list_of_obs]):
1512        raise Exception('Not possible to merge data that contains covobs!')
1513    new_dict = {}
1514    idl_dict = {}
1515    for o in list_of_obs:
1516        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1517                        for key in set(o.deltas) | set(o.r_values)})
1518        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1519
1520    names = sorted(new_dict.keys())
1521    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1522    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1523    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1524    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

#   def cov_Obs(means, cov, name, grad=None):
View Source
1527def cov_Obs(means, cov, name, grad=None):
1528    """Create an Obs based on mean(s) and a covariance matrix
1529
1530    Parameters
1531    ----------
1532    mean : list of floats or float
1533        N mean value(s) of the new Obs
1534    cov : list or array
1535        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1536    name : str
1537        identifier for the covariance matrix
1538    grad : list or array
1539        Gradient of the Covobs wrt. the means belonging to cov.
1540    """
1541
1542    def covobs_to_obs(co):
1543        """Make an Obs out of a Covobs
1544
1545        Parameters
1546        ----------
1547        co : Covobs
1548            Covobs to be embedded into the Obs
1549        """
1550        o = Obs([], [], means=[])
1551        o._value = co.value
1552        o.names.append(co.name)
1553        o._covobs[co.name] = co
1554        o._dvalue = np.sqrt(co.errsq())
1555        return o
1556
1557    ol = []
1558    if isinstance(means, (float, int)):
1559        means = [means]
1560
1561    for i in range(len(means)):
1562        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1563    if ol[0].covobs[name].N != len(means):
1564        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1565    if len(ol) == 1:
1566        return ol[0]
1567    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.