pyerrors.obs

View Source
   0import warnings
   1import pickle
   2import numpy as np
   3import autograd.numpy as anp  # Thinly-wrapped numpy
   4from autograd import jacobian
   5import matplotlib.pyplot as plt
   6from scipy.stats import skew, skewtest, kurtosis, kurtosistest
   7import numdifftools as nd
   8from itertools import groupby
   9from .covobs import Covobs
  10
  11
  12class Obs:
  13    """Class for a general observable.
  14
  15    Instances of Obs are the basic objects of a pyerrors error analysis.
  16    They are initialized with a list which contains arrays of samples for
  17    different ensembles/replica and another list of same length which contains
  18    the names of the ensembles/replica. Mathematical operations can be
  19    performed on instances. The result is another instance of Obs. The error of
  20    an instance can be computed with the gamma_method. Also contains additional
  21    methods for output and visualization of the error calculation.
  22
  23    Attributes
  24    ----------
  25    S_global : float
  26        Standard value for S (default 2.0)
  27    S_dict : dict
  28        Dictionary for S values. If an entry for a given ensemble
  29        exists this overwrites the standard value for that ensemble.
  30    tau_exp_global : float
  31        Standard value for tau_exp (default 0.0)
  32    tau_exp_dict : dict
  33        Dictionary for tau_exp values. If an entry for a given ensemble exists
  34        this overwrites the standard value for that ensemble.
  35    N_sigma_global : float
  36        Standard value for N_sigma (default 1.0)
  37    N_sigma_dict : dict
  38        Dictionary for N_sigma values. If an entry for a given ensemble exists
  39        this overwrites the standard value for that ensemble.
  40    """
  41    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  42                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  43                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  44                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  45                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
  46
  47    S_global = 2.0
  48    S_dict = {}
  49    tau_exp_global = 0.0
  50    tau_exp_dict = {}
  51    N_sigma_global = 1.0
  52    N_sigma_dict = {}
  53    filter_eps = 1e-10
  54
  55    def __init__(self, samples, names, idl=None, **kwargs):
  56        """ Initialize Obs object.
  57
  58        Parameters
  59        ----------
  60        samples : list
  61            list of numpy arrays containing the Monte Carlo samples
  62        names : list
  63            list of strings labeling the individual samples
  64        idl : list, optional
  65            list of ranges or lists on which the samples are defined
  66        """
  67
  68        if kwargs.get("means") is None and len(samples):
  69            if len(samples) != len(names):
  70                raise Exception('Length of samples and names incompatible.')
  71            if idl is not None:
  72                if len(idl) != len(names):
  73                    raise Exception('Length of idl incompatible with samples and names.')
  74            name_length = len(names)
  75            if name_length > 1:
  76                if name_length != len(set(names)):
  77                    raise Exception('names are not unique.')
  78                if not all(isinstance(x, str) for x in names):
  79                    raise TypeError('All names have to be strings.')
  80            else:
  81                if not isinstance(names[0], str):
  82                    raise TypeError('All names have to be strings.')
  83            if min(len(x) for x in samples) <= 4:
  84                raise Exception('Samples have to have at least 5 entries.')
  85
  86        self.names = sorted(names)
  87        self.shape = {}
  88        self.r_values = {}
  89        self.deltas = {}
  90        self._covobs = {}
  91
  92        self._value = 0
  93        self.N = 0
  94        self.is_merged = {}
  95        self.idl = {}
  96        if idl is not None:
  97            for name, idx in sorted(zip(names, idl)):
  98                if isinstance(idx, range):
  99                    self.idl[name] = idx
 100                elif isinstance(idx, (list, np.ndarray)):
 101                    dc = np.unique(np.diff(idx))
 102                    if np.any(dc < 0):
 103                        raise Exception("Unsorted idx for idl[%s]" % (name))
 104                    if len(dc) == 1:
 105                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 106                    else:
 107                        self.idl[name] = list(idx)
 108                else:
 109                    raise Exception('incompatible type for idl[%s].' % (name))
 110        else:
 111            for name, sample in sorted(zip(names, samples)):
 112                self.idl[name] = range(1, len(sample) + 1)
 113
 114        if kwargs.get("means") is not None:
 115            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 116                self.shape[name] = len(self.idl[name])
 117                self.N += self.shape[name]
 118                self.r_values[name] = mean
 119                self.deltas[name] = sample
 120        else:
 121            for name, sample in sorted(zip(names, samples)):
 122                self.shape[name] = len(self.idl[name])
 123                self.N += self.shape[name]
 124                if len(sample) != self.shape[name]:
 125                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 126                self.r_values[name] = np.mean(sample)
 127                self.deltas[name] = sample - self.r_values[name]
 128                self._value += self.shape[name] * self.r_values[name]
 129            self._value /= self.N
 130
 131        self._dvalue = 0.0
 132        self.ddvalue = 0.0
 133        self.reweighted = False
 134
 135        self.tag = None
 136
 137    @property
 138    def value(self):
 139        return self._value
 140
 141    @property
 142    def dvalue(self):
 143        return self._dvalue
 144
 145    @property
 146    def e_names(self):
 147        return sorted(set([o.split('|')[0] for o in self.names]))
 148
 149    @property
 150    def cov_names(self):
 151        return sorted(set([o for o in self.covobs.keys()]))
 152
 153    @property
 154    def mc_names(self):
 155        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 156
 157    @property
 158    def e_content(self):
 159        res = {}
 160        for e, e_name in enumerate(self.e_names):
 161            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 162            if e_name in self.names:
 163                res[e_name].append(e_name)
 164        return res
 165
 166    @property
 167    def covobs(self):
 168        return self._covobs
 169
 170    def gamma_method(self, **kwargs):
 171        """Estimate the error and related properties of the Obs.
 172
 173        Parameters
 174        ----------
 175        S : float
 176            specifies a custom value for the parameter S (default 2.0).
 177            If set to 0 it is assumed that the data exhibits no
 178            autocorrelation. In this case the error estimates coincides
 179            with the sample standard error.
 180        tau_exp : float
 181            positive value triggers the critical slowing down analysis
 182            (default 0.0).
 183        N_sigma : float
 184            number of standard deviations from zero until the tail is
 185            attached to the autocorrelation function (default 1).
 186        fft : bool
 187            determines whether the fft algorithm is used for the computation
 188            of the autocorrelation function (default True)
 189        """
 190
 191        e_content = self.e_content
 192        self.e_dvalue = {}
 193        self.e_ddvalue = {}
 194        self.e_tauint = {}
 195        self.e_dtauint = {}
 196        self.e_windowsize = {}
 197        self.e_n_tauint = {}
 198        self.e_n_dtauint = {}
 199        e_gamma = {}
 200        self.e_rho = {}
 201        self.e_drho = {}
 202        self._dvalue = 0
 203        self.ddvalue = 0
 204
 205        self.S = {}
 206        self.tau_exp = {}
 207        self.N_sigma = {}
 208
 209        if kwargs.get('fft') is False:
 210            fft = False
 211        else:
 212            fft = True
 213
 214        def _parse_kwarg(kwarg_name):
 215            if kwarg_name in kwargs:
 216                tmp = kwargs.get(kwarg_name)
 217                if isinstance(tmp, (int, float)):
 218                    if tmp < 0:
 219                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 220                    for e, e_name in enumerate(self.e_names):
 221                        getattr(self, kwarg_name)[e_name] = tmp
 222                else:
 223                    raise TypeError(kwarg_name + ' is not in proper format.')
 224            else:
 225                for e, e_name in enumerate(self.e_names):
 226                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 227                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 228                    else:
 229                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 230
 231        _parse_kwarg('S')
 232        _parse_kwarg('tau_exp')
 233        _parse_kwarg('N_sigma')
 234
 235        for e, e_name in enumerate(self.mc_names):
 236            r_length = []
 237            for r_name in e_content[e_name]:
 238                if isinstance(self.idl[r_name], range):
 239                    r_length.append(len(self.idl[r_name]))
 240                else:
 241                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
 242
 243            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 244            w_max = max(r_length) // 2
 245            e_gamma[e_name] = np.zeros(w_max)
 246            self.e_rho[e_name] = np.zeros(w_max)
 247            self.e_drho[e_name] = np.zeros(w_max)
 248
 249            for r_name in e_content[e_name]:
 250                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
 251
 252            gamma_div = np.zeros(w_max)
 253            for r_name in e_content[e_name]:
 254                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
 255            gamma_div[gamma_div < 1] = 1.0
 256            e_gamma[e_name] /= gamma_div[:w_max]
 257
 258            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 259                self.e_tauint[e_name] = 0.5
 260                self.e_dtauint[e_name] = 0.0
 261                self.e_dvalue[e_name] = 0.0
 262                self.e_ddvalue[e_name] = 0.0
 263                self.e_windowsize[e_name] = 0
 264                continue
 265
 266            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 267            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 268            # Make sure no entry of tauint is smaller than 0.5
 269            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 270            # hep-lat/0306017 eq. (42)
 271            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 272            self.e_n_dtauint[e_name][0] = 0.0
 273
 274            def _compute_drho(i):
 275                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
 276                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 277
 278            _compute_drho(1)
 279            if self.tau_exp[e_name] > 0:
 280                texp = self.tau_exp[e_name]
 281                # Critical slowing down analysis
 282                if w_max // 2 <= 1:
 283                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 284                for n in range(1, w_max // 2):
 285                    _compute_drho(n + 1)
 286                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 287                        # Bias correction hep-lat/0306017 eq. (49) included
 288                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 289                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 290                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 291                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 292                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 293                        self.e_windowsize[e_name] = n
 294                        break
 295            else:
 296                if self.S[e_name] == 0.0:
 297                    self.e_tauint[e_name] = 0.5
 298                    self.e_dtauint[e_name] = 0.0
 299                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 300                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 301                    self.e_windowsize[e_name] = 0
 302                else:
 303                    # Standard automatic windowing procedure
 304                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 305                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
 306                    for n in range(1, w_max):
 307                        if n < w_max // 2 - 2:
 308                            _compute_drho(n + 1)
 309                        if g_w[n - 1] < 0 or n >= w_max - 1:
 310                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 311                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 312                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 313                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 314                            self.e_windowsize[e_name] = n
 315                            break
 316
 317            self._dvalue += self.e_dvalue[e_name] ** 2
 318            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 319
 320        for e_name in self.cov_names:
 321            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 322            self.e_ddvalue[e_name] = 0
 323            self._dvalue += self.e_dvalue[e_name]**2
 324
 325        self._dvalue = np.sqrt(self._dvalue)
 326        if self._dvalue == 0.0:
 327            self.ddvalue = 0.0
 328        else:
 329            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 330        return
 331
 332    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
 333        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 334           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 335
 336        Parameters
 337        ----------
 338        deltas : list
 339            List of fluctuations
 340        idx : list
 341            List or range of configurations on which the deltas are defined.
 342        shape : int
 343            Number of configurations in idx.
 344        w_max : int
 345            Upper bound for the summation window.
 346        fft : bool
 347            determines whether the fft algorithm is used for the computation
 348            of the autocorrelation function.
 349        """
 350        gamma = np.zeros(w_max)
 351        deltas = _expand_deltas(deltas, idx, shape)
 352        new_shape = len(deltas)
 353        if fft:
 354            max_gamma = min(new_shape, w_max)
 355            # The padding for the fft has to be even
 356            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 357            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 358        else:
 359            for n in range(w_max):
 360                if new_shape - n >= 0:
 361                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 362
 363        return gamma
 364
 365    def details(self, ens_content=True):
 366        """Output detailed properties of the Obs.
 367
 368        Parameters
 369        ----------
 370        ens_content : bool
 371            print details about the ensembles and replica if true.
 372        """
 373        if self.tag is not None:
 374            print("Description:", self.tag)
 375        if not hasattr(self, 'e_dvalue'):
 376            print('Result\t %3.8e' % (self.value))
 377        else:
 378            if self.value == 0.0:
 379                percentage = np.nan
 380            else:
 381                percentage = np.abs(self._dvalue / self.value) * 100
 382            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 383            if len(self.e_names) > 1:
 384                print(' Ensemble errors:')
 385            for e_name in self.mc_names:
 386                if len(self.e_names) > 1:
 387                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 388                if self.tau_exp[e_name] > 0:
 389                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
 390                else:
 391                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
 392            for e_name in self.cov_names:
 393                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 394        if ens_content is True:
 395            if len(self.e_names) == 1:
 396                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 397            else:
 398                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 399            my_string_list = []
 400            for key, value in sorted(self.e_content.items()):
 401                if key not in self.covobs:
 402                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 403                    if len(value) == 1:
 404                        my_string += f': {self.shape[value[0]]} configurations'
 405                        if isinstance(self.idl[value[0]], range):
 406                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 407                        else:
 408                            my_string += ' (irregular range)'
 409                    else:
 410                        sublist = []
 411                        for v in value:
 412                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 413                            my_substring += f': {self.shape[v]} configurations'
 414                            if isinstance(self.idl[v], range):
 415                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 416                            else:
 417                                my_substring += ' (irregular range)'
 418                            sublist.append(my_substring)
 419
 420                        my_string += '\n' + '\n'.join(sublist)
 421                else:
 422                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 423                my_string_list.append(my_string)
 424            print('\n'.join(my_string_list))
 425
 426    def is_zero_within_error(self, sigma=1):
 427        """Checks whether the observable is zero within 'sigma' standard errors.
 428
 429        Parameters
 430        ----------
 431        sigma : int
 432            Number of standard errors used for the check.
 433
 434        Works only properly when the gamma method was run.
 435        """
 436        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 437
 438    def is_zero(self, atol=1e-10):
 439        """Checks whether the observable is zero within a given tolerance.
 440
 441        Parameters
 442        ----------
 443        atol : float
 444            Absolute tolerance (for details see numpy documentation).
 445        """
 446        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 447
 448    def plot_tauint(self, save=None):
 449        """Plot integrated autocorrelation time for each ensemble.
 450
 451        Parameters
 452        ----------
 453        save : str
 454            saves the figure to a file named 'save' if.
 455        """
 456        if not hasattr(self, 'e_dvalue'):
 457            raise Exception('Run the gamma method first.')
 458
 459        for e, e_name in enumerate(self.mc_names):
 460            fig = plt.figure()
 461            plt.xlabel(r'$W$')
 462            plt.ylabel(r'$\tau_\mathrm{int}$')
 463            length = int(len(self.e_n_tauint[e_name]))
 464            if self.tau_exp[e_name] > 0:
 465                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 466                x_help = np.arange(2 * self.tau_exp[e_name])
 467                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 468                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 469                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 470                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 471                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 472                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 473                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 474            else:
 475                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 476                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 477
 478            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 479            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 480            plt.legend()
 481            plt.xlim(-0.5, xmax)
 482            ylim = plt.ylim()
 483            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 484            plt.draw()
 485            if save:
 486                fig.savefig(save + "_" + str(e))
 487
 488    def plot_rho(self, save=None):
 489        """Plot normalized autocorrelation function time for each ensemble.
 490
 491        Parameters
 492        ----------
 493        save : str
 494            saves the figure to a file named 'save' if.
 495        """
 496        if not hasattr(self, 'e_dvalue'):
 497            raise Exception('Run the gamma method first.')
 498        for e, e_name in enumerate(self.mc_names):
 499            fig = plt.figure()
 500            plt.xlabel('W')
 501            plt.ylabel('rho')
 502            length = int(len(self.e_drho[e_name]))
 503            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 504            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 505            if self.tau_exp[e_name] > 0:
 506                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 507                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 508                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 509                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 510            else:
 511                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 512                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 513            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 514            plt.xlim(-0.5, xmax)
 515            plt.draw()
 516            if save:
 517                fig.savefig(save + "_" + str(e))
 518
 519    def plot_rep_dist(self):
 520        """Plot replica distribution for each ensemble with more than one replicum."""
 521        if not hasattr(self, 'e_dvalue'):
 522            raise Exception('Run the gamma method first.')
 523        for e, e_name in enumerate(self.mc_names):
 524            if len(self.e_content[e_name]) == 1:
 525                print('No replica distribution for a single replicum (', e_name, ')')
 526                continue
 527            r_length = []
 528            sub_r_mean = 0
 529            for r, r_name in enumerate(self.e_content[e_name]):
 530                r_length.append(len(self.deltas[r_name]))
 531                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 532            e_N = np.sum(r_length)
 533            sub_r_mean /= e_N
 534            arr = np.zeros(len(self.e_content[e_name]))
 535            for r, r_name in enumerate(self.e_content[e_name]):
 536                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 537            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 538            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 539            plt.draw()
 540
 541    def plot_history(self, expand=True):
 542        """Plot derived Monte Carlo history for each ensemble
 543
 544        Parameters
 545        ----------
 546        expand : bool
 547            show expanded history for irregular Monte Carlo chains (default: True).
 548        """
 549        for e, e_name in enumerate(self.mc_names):
 550            plt.figure()
 551            r_length = []
 552            tmp = []
 553            tmp_expanded = []
 554            for r, r_name in enumerate(self.e_content[e_name]):
 555                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 556                if expand:
 557                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
 558                    r_length.append(len(tmp_expanded[-1]))
 559                else:
 560                    r_length.append(len(tmp[-1]))
 561            e_N = np.sum(r_length)
 562            x = np.arange(e_N)
 563            y_test = np.concatenate(tmp, axis=0)
 564            if expand:
 565                y = np.concatenate(tmp_expanded, axis=0)
 566            else:
 567                y = y_test
 568            plt.errorbar(x, y, fmt='.', markersize=3)
 569            plt.xlim(-0.5, e_N - 0.5)
 570            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 571            plt.draw()
 572
 573    def plot_piechart(self, save=None):
 574        """Plot piechart which shows the fractional contribution of each
 575        ensemble to the error and returns a dictionary containing the fractions.
 576
 577        Parameters
 578        ----------
 579        save : str
 580            saves the figure to a file named 'save' if.
 581        """
 582        if not hasattr(self, 'e_dvalue'):
 583            raise Exception('Run the gamma method first.')
 584        if np.isclose(0.0, self._dvalue, atol=1e-15):
 585            raise Exception('Error is 0.0')
 586        labels = self.e_names
 587        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 588        fig1, ax1 = plt.subplots()
 589        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 590        ax1.axis('equal')
 591        plt.draw()
 592        if save:
 593            fig1.savefig(save)
 594
 595        return dict(zip(self.e_names, sizes))
 596
 597    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 598        """Dump the Obs to a file 'name' of chosen format.
 599
 600        Parameters
 601        ----------
 602        filename : str
 603            name of the file to be saved.
 604        datatype : str
 605            Format of the exported file. Supported formats include
 606            "json.gz" and "pickle"
 607        description : str
 608            Description for output file, only relevant for json.gz format.
 609        path : str
 610            specifies a custom path for the file (default '.')
 611        """
 612        if 'path' in kwargs:
 613            file_name = kwargs.get('path') + '/' + filename
 614        else:
 615            file_name = filename
 616
 617        if datatype == "json.gz":
 618            from .input.json import dump_to_json
 619            dump_to_json([self], file_name, description=description)
 620        elif datatype == "pickle":
 621            with open(file_name + '.p', 'wb') as fb:
 622                pickle.dump(self, fb)
 623        else:
 624            raise Exception("Unknown datatype " + str(datatype))
 625
 626    def export_jackknife(self):
 627        """Export jackknife samples from the Obs
 628
 629        Returns
 630        -------
 631        numpy.ndarray
 632            Returns a numpy array of length N + 1 where N is the number of samples
 633            for the given ensemble and replicum. The zeroth entry of the array contains
 634            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 635            derived from the Obs. The current implementation only works for observables
 636            defined on exactly one ensemble and replicum. The derived jackknife samples
 637            should agree with samples from a full jackknife analysis up to O(1/N).
 638        """
 639
 640        if len(self.names) != 1:
 641            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 642
 643        name = self.names[0]
 644        full_data = self.deltas[name] + self.r_values[name]
 645        n = full_data.size
 646        mean = self.value
 647        tmp_jacks = np.zeros(n + 1)
 648        tmp_jacks[0] = mean
 649        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 650        return tmp_jacks
 651
 652    def __float__(self):
 653        return float(self.value)
 654
 655    def __repr__(self):
 656        return 'Obs[' + str(self) + ']'
 657
 658    def __str__(self):
 659        if self._dvalue == 0.0:
 660            return str(self.value)
 661        fexp = np.floor(np.log10(self._dvalue))
 662        if fexp < 0.0:
 663            return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
 664        elif fexp == 0.0:
 665            return '{:.1f}({:1.1f})'.format(self.value, self._dvalue)
 666        else:
 667            return '{:.0f}({:2.0f})'.format(self.value, self._dvalue)
 668
 669    # Overload comparisons
 670    def __lt__(self, other):
 671        return self.value < other
 672
 673    def __le__(self, other):
 674        return self.value <= other
 675
 676    def __gt__(self, other):
 677        return self.value > other
 678
 679    def __ge__(self, other):
 680        return self.value >= other
 681
 682    def __eq__(self, other):
 683        return (self - other).is_zero()
 684
 685    def __ne__(self, other):
 686        return not (self - other).is_zero()
 687
 688    # Overload math operations
 689    def __add__(self, y):
 690        if isinstance(y, Obs):
 691            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 692        else:
 693            if isinstance(y, np.ndarray):
 694                return np.array([self + o for o in y])
 695            elif y.__class__.__name__ in ['Corr', 'CObs']:
 696                return NotImplemented
 697            else:
 698                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 699
 700    def __radd__(self, y):
 701        return self + y
 702
 703    def __mul__(self, y):
 704        if isinstance(y, Obs):
 705            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 706        else:
 707            if isinstance(y, np.ndarray):
 708                return np.array([self * o for o in y])
 709            elif isinstance(y, complex):
 710                return CObs(self * y.real, self * y.imag)
 711            elif y.__class__.__name__ in ['Corr', 'CObs']:
 712                return NotImplemented
 713            else:
 714                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 715
 716    def __rmul__(self, y):
 717        return self * y
 718
 719    def __sub__(self, y):
 720        if isinstance(y, Obs):
 721            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 722        else:
 723            if isinstance(y, np.ndarray):
 724                return np.array([self - o for o in y])
 725            elif y.__class__.__name__ in ['Corr', 'CObs']:
 726                return NotImplemented
 727            else:
 728                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 729
 730    def __rsub__(self, y):
 731        return -1 * (self - y)
 732
 733    def __pos__(self):
 734        return self
 735
 736    def __neg__(self):
 737        return -1 * self
 738
 739    def __truediv__(self, y):
 740        if isinstance(y, Obs):
 741            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 742        else:
 743            if isinstance(y, np.ndarray):
 744                return np.array([self / o for o in y])
 745            elif y.__class__.__name__ in ['Corr', 'CObs']:
 746                return NotImplemented
 747            else:
 748                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 749
 750    def __rtruediv__(self, y):
 751        if isinstance(y, Obs):
 752            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 753        else:
 754            if isinstance(y, np.ndarray):
 755                return np.array([o / self for o in y])
 756            elif y.__class__.__name__ in ['Corr', 'CObs']:
 757                return NotImplemented
 758            else:
 759                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 760
 761    def __pow__(self, y):
 762        if isinstance(y, Obs):
 763            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 764        else:
 765            return derived_observable(lambda x: x[0] ** y, [self])
 766
 767    def __rpow__(self, y):
 768        if isinstance(y, Obs):
 769            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 770        else:
 771            return derived_observable(lambda x: y ** x[0], [self])
 772
 773    def __abs__(self):
 774        return derived_observable(lambda x: anp.abs(x[0]), [self])
 775
 776    # Overload numpy functions
 777    def sqrt(self):
 778        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 779
 780    def log(self):
 781        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 782
 783    def exp(self):
 784        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 785
 786    def sin(self):
 787        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 788
 789    def cos(self):
 790        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 791
 792    def tan(self):
 793        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 794
 795    def arcsin(self):
 796        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 797
 798    def arccos(self):
 799        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 800
 801    def arctan(self):
 802        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 803
 804    def sinh(self):
 805        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 806
 807    def cosh(self):
 808        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 809
 810    def tanh(self):
 811        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 812
 813    def arcsinh(self):
 814        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 815
 816    def arccosh(self):
 817        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 818
 819    def arctanh(self):
 820        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 821
 822
 823class CObs:
 824    """Class for a complex valued observable."""
 825    __slots__ = ['_real', '_imag', 'tag']
 826
 827    def __init__(self, real, imag=0.0):
 828        self._real = real
 829        self._imag = imag
 830        self.tag = None
 831
 832    @property
 833    def real(self):
 834        return self._real
 835
 836    @property
 837    def imag(self):
 838        return self._imag
 839
 840    def gamma_method(self, **kwargs):
 841        """Executes the gamma_method for the real and the imaginary part."""
 842        if isinstance(self.real, Obs):
 843            self.real.gamma_method(**kwargs)
 844        if isinstance(self.imag, Obs):
 845            self.imag.gamma_method(**kwargs)
 846
 847    def is_zero(self):
 848        """Checks whether both real and imaginary part are zero within machine precision."""
 849        return self.real == 0.0 and self.imag == 0.0
 850
 851    def conjugate(self):
 852        return CObs(self.real, -self.imag)
 853
 854    def __add__(self, other):
 855        if isinstance(other, np.ndarray):
 856            return other + self
 857        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 858            return CObs(self.real + other.real,
 859                        self.imag + other.imag)
 860        else:
 861            return CObs(self.real + other, self.imag)
 862
 863    def __radd__(self, y):
 864        return self + y
 865
 866    def __sub__(self, other):
 867        if isinstance(other, np.ndarray):
 868            return -1 * (other - self)
 869        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 870            return CObs(self.real - other.real, self.imag - other.imag)
 871        else:
 872            return CObs(self.real - other, self.imag)
 873
 874    def __rsub__(self, other):
 875        return -1 * (self - other)
 876
 877    def __mul__(self, other):
 878        if isinstance(other, np.ndarray):
 879            return other * self
 880        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 881            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 882                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 883                                               [self.real, other.real, self.imag, other.imag],
 884                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 885                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 886                                               [self.real, other.real, self.imag, other.imag],
 887                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 888            elif getattr(other, 'imag', 0) != 0:
 889                return CObs(self.real * other.real - self.imag * other.imag,
 890                            self.imag * other.real + self.real * other.imag)
 891            else:
 892                return CObs(self.real * other.real, self.imag * other.real)
 893        else:
 894            return CObs(self.real * other, self.imag * other)
 895
 896    def __rmul__(self, other):
 897        return self * other
 898
 899    def __truediv__(self, other):
 900        if isinstance(other, np.ndarray):
 901            return 1 / (other / self)
 902        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 903            r = other.real ** 2 + other.imag ** 2
 904            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 905        else:
 906            return CObs(self.real / other, self.imag / other)
 907
 908    def __rtruediv__(self, other):
 909        r = self.real ** 2 + self.imag ** 2
 910        if hasattr(other, 'real') and hasattr(other, 'imag'):
 911            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 912        else:
 913            return CObs(self.real * other / r, -self.imag * other / r)
 914
 915    def __abs__(self):
 916        return np.sqrt(self.real**2 + self.imag**2)
 917
 918    def __pos__(self):
 919        return self
 920
 921    def __neg__(self):
 922        return -1 * self
 923
 924    def __eq__(self, other):
 925        return self.real == other.real and self.imag == other.imag
 926
 927    def __str__(self):
 928        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 929
 930    def __repr__(self):
 931        return 'CObs[' + str(self) + ']'
 932
 933
 934def _expand_deltas(deltas, idx, shape):
 935    """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0.
 936       If idx is of type range, the deltas are not changed
 937
 938    Parameters
 939    ----------
 940    deltas : list
 941        List of fluctuations
 942    idx : list
 943        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
 944    shape : int
 945        Number of configs in idx.
 946    """
 947    if isinstance(idx, range):
 948        return deltas
 949    else:
 950        ret = np.zeros(idx[-1] - idx[0] + 1)
 951        for i in range(shape):
 952            ret[idx[i] - idx[0]] = deltas[i]
 953        return ret
 954
 955
 956def _merge_idx(idl):
 957    """Returns the union of all lists in idl as sorted list
 958
 959    Parameters
 960    ----------
 961    idl : list
 962        List of lists or ranges.
 963    """
 964
 965    # Use groupby to efficiently check whether all elements of idl are identical
 966    try:
 967        g = groupby(idl)
 968        if next(g, True) and not next(g, False):
 969            return idl[0]
 970    except Exception:
 971        pass
 972
 973    if np.all([type(idx) is range for idx in idl]):
 974        if len(set([idx[0] for idx in idl])) == 1:
 975            idstart = min([idx.start for idx in idl])
 976            idstop = max([idx.stop for idx in idl])
 977            idstep = min([idx.step for idx in idl])
 978            return range(idstart, idstop, idstep)
 979
 980    return sorted(set().union(*idl))
 981
 982
 983def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
 984    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
 985       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
 986       common divisor of the step sizes is used as new step size.
 987
 988    Parameters
 989    ----------
 990    deltas : list
 991        List of fluctuations
 992    idx : list
 993        List or range of configs on which the deltas are defined.
 994        Has to be a subset of new_idx and has to be sorted in ascending order.
 995    shape : list
 996        Number of configs in idx.
 997    new_idx : list
 998        List of configs that defines the new range, has to be sorted in ascending order.
 999    """
1000
1001    if type(idx) is range and type(new_idx) is range:
1002        if idx == new_idx:
1003            return deltas
1004    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1005    for i in range(shape):
1006        ret[idx[i] - new_idx[0]] = deltas[i]
1007    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
1008
1009
1010def _filter_zeroes(deltas, idx, eps=Obs.filter_eps):
1011    """Filter out all configurations with vanishing fluctuation such that they do not
1012       contribute to the error estimate anymore. Returns the new deltas and
1013       idx according to the filtering.
1014       A fluctuation is considered to be vanishing, if it is smaller than eps times
1015       the mean of the absolute values of all deltas in one list.
1016
1017    Parameters
1018    ----------
1019    deltas : list
1020        List of fluctuations
1021    idx : list
1022        List or ranges of configs on which the deltas are defined.
1023    eps : float
1024        Prefactor that enters the filter criterion.
1025    """
1026    new_deltas = []
1027    new_idx = []
1028    maxd = np.mean(np.fabs(deltas))
1029    for i in range(len(deltas)):
1030        if abs(deltas[i]) > eps * maxd:
1031            new_deltas.append(deltas[i])
1032            new_idx.append(idx[i])
1033    if new_idx:
1034        return np.array(new_deltas), new_idx
1035    else:
1036        return deltas, idx
1037
1038
1039def derived_observable(func, data, array_mode=False, **kwargs):
1040    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1041
1042    Parameters
1043    ----------
1044    func : object
1045        arbitrary function of the form func(data, **kwargs). For the
1046        automatic differentiation to work, all numpy functions have to have
1047        the autograd wrapper (use 'import autograd.numpy as anp').
1048    data : list
1049        list of Obs, e.g. [obs1, obs2, obs3].
1050    num_grad : bool
1051        if True, numerical derivatives are used instead of autograd
1052        (default False). To control the numerical differentiation the
1053        kwargs of numdifftools.step_generators.MaxStepGenerator
1054        can be used.
1055    man_grad : list
1056        manually supply a list or an array which contains the jacobian
1057        of func. Use cautiously, supplying the wrong derivative will
1058        not be intercepted.
1059
1060    Notes
1061    -----
1062    For simple mathematical operations it can be practical to use anonymous
1063    functions. For the ratio of two observables one can e.g. use
1064
1065    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1066    """
1067
1068    data = np.asarray(data)
1069    raveled_data = data.ravel()
1070
1071    # Workaround for matrix operations containing non Obs data
1072    if not all(isinstance(x, Obs) for x in raveled_data):
1073        for i in range(len(raveled_data)):
1074            if isinstance(raveled_data[i], (int, float)):
1075                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1076
1077    allcov = {}
1078    for o in raveled_data:
1079        for name in o.cov_names:
1080            if name in allcov:
1081                if not np.allclose(allcov[name], o.covobs[name].cov):
1082                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1083            else:
1084                allcov[name] = o.covobs[name].cov
1085
1086    n_obs = len(raveled_data)
1087    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1088    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1089    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1090
1091    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1092    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1093
1094    if data.ndim == 1:
1095        values = np.array([o.value for o in data])
1096    else:
1097        values = np.vectorize(lambda x: x.value)(data)
1098
1099    new_values = func(values, **kwargs)
1100
1101    multi = int(isinstance(new_values, np.ndarray))
1102
1103    new_r_values = {}
1104    new_idl_d = {}
1105    for name in new_sample_names:
1106        idl = []
1107        tmp_values = np.zeros(n_obs)
1108        for i, item in enumerate(raveled_data):
1109            tmp_values[i] = item.r_values.get(name, item.value)
1110            tmp_idl = item.idl.get(name)
1111            if tmp_idl is not None:
1112                idl.append(tmp_idl)
1113        if multi > 0:
1114            tmp_values = np.array(tmp_values).reshape(data.shape)
1115        new_r_values[name] = func(tmp_values, **kwargs)
1116        new_idl_d[name] = _merge_idx(idl)
1117        if not is_merged[name]:
1118            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1119
1120    if 'man_grad' in kwargs:
1121        deriv = np.asarray(kwargs.get('man_grad'))
1122        if new_values.shape + data.shape != deriv.shape:
1123            raise Exception('Manual derivative does not have correct shape.')
1124    elif kwargs.get('num_grad') is True:
1125        if multi > 0:
1126            raise Exception('Multi mode currently not supported for numerical derivative')
1127        options = {
1128            'base_step': 0.1,
1129            'step_ratio': 2.5}
1130        for key in options.keys():
1131            kwarg = kwargs.get(key)
1132            if kwarg is not None:
1133                options[key] = kwarg
1134        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1135        if tmp_df.size == 1:
1136            deriv = np.array([tmp_df.real])
1137        else:
1138            deriv = tmp_df.real
1139    else:
1140        deriv = jacobian(func)(values, **kwargs)
1141
1142    final_result = np.zeros(new_values.shape, dtype=object)
1143
1144    if array_mode is True:
1145
1146        class _Zero_grad():
1147            def __init__(self, N):
1148                self.grad = np.zeros((N, 1))
1149
1150        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1151        d_extracted = {}
1152        g_extracted = {}
1153        for name in new_sample_names:
1154            d_extracted[name] = []
1155            ens_length = len(new_idl_d[name])
1156            for i_dat, dat in enumerate(data):
1157                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1158        for name in new_cov_names:
1159            g_extracted[name] = []
1160            zero_grad = _Zero_grad(new_covobs_lengths[name])
1161            for i_dat, dat in enumerate(data):
1162                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1163
1164    for i_val, new_val in np.ndenumerate(new_values):
1165        new_deltas = {}
1166        new_grad = {}
1167        if array_mode is True:
1168            for name in new_sample_names:
1169                ens_length = d_extracted[name][0].shape[-1]
1170                new_deltas[name] = np.zeros(ens_length)
1171                for i_dat, dat in enumerate(d_extracted[name]):
1172                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1173            for name in new_cov_names:
1174                new_grad[name] = 0
1175                for i_dat, dat in enumerate(g_extracted[name]):
1176                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1177        else:
1178            for j_obs, obs in np.ndenumerate(data):
1179                for name in obs.names:
1180                    if name in obs.cov_names:
1181                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1182                    else:
1183                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1184
1185        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1186
1187        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1188            raise Exception('The same name has been used for deltas and covobs!')
1189        new_samples = []
1190        new_means = []
1191        new_idl = []
1192        new_names_obs = []
1193        for name in new_names:
1194            if name not in new_covobs:
1195                if is_merged[name]:
1196                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1197                else:
1198                    filtered_deltas = new_deltas[name]
1199                    filtered_idl_d = new_idl_d[name]
1200
1201                new_samples.append(filtered_deltas)
1202                new_idl.append(filtered_idl_d)
1203                new_means.append(new_r_values[name][i_val])
1204                new_names_obs.append(name)
1205        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1206        for name in new_covobs:
1207            final_result[i_val].names.append(name)
1208        final_result[i_val]._covobs = new_covobs
1209        final_result[i_val]._value = new_val
1210        final_result[i_val].is_merged = is_merged
1211        final_result[i_val].reweighted = reweighted
1212
1213    if multi == 0:
1214        final_result = final_result.item()
1215
1216    return final_result
1217
1218
1219def _reduce_deltas(deltas, idx_old, idx_new):
1220    """Extract deltas defined on idx_old on all configs of idx_new.
1221
1222    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1223    are ordered in an ascending order.
1224
1225    Parameters
1226    ----------
1227    deltas : list
1228        List of fluctuations
1229    idx_old : list
1230        List or range of configs on which the deltas are defined
1231    idx_new : list
1232        List of configs for which we want to extract the deltas.
1233        Has to be a subset of idx_old.
1234    """
1235    if not len(deltas) == len(idx_old):
1236        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1237    if type(idx_old) is range and type(idx_new) is range:
1238        if idx_old == idx_new:
1239            return deltas
1240    shape = len(idx_new)
1241    ret = np.zeros(shape)
1242    oldpos = 0
1243    for i in range(shape):
1244        pos = -1
1245        for j in range(oldpos, len(idx_old)):
1246            if idx_old[j] == idx_new[i]:
1247                pos = j
1248                break
1249        if pos < 0:
1250            raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i]))
1251        ret[i] = deltas[pos]
1252        oldpos = pos
1253    return np.array(ret)
1254
1255
1256def reweight(weight, obs, **kwargs):
1257    """Reweight a list of observables.
1258
1259    Parameters
1260    ----------
1261    weight : Obs
1262        Reweighting factor. An Observable that has to be defined on a superset of the
1263        configurations in obs[i].idl for all i.
1264    obs : list
1265        list of Obs, e.g. [obs1, obs2, obs3].
1266    all_configs : bool
1267        if True, the reweighted observables are normalized by the average of
1268        the reweighting factor on all configurations in weight.idl and not
1269        on the configurations in obs[i].idl.
1270    """
1271    result = []
1272    for i in range(len(obs)):
1273        if len(obs[i].cov_names):
1274            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1275        if not set(obs[i].names).issubset(weight.names):
1276            raise Exception('Error: Ensembles do not fit')
1277        for name in obs[i].names:
1278            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1279                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1280        new_samples = []
1281        w_deltas = {}
1282        for name in sorted(obs[i].names):
1283            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1284            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1285        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1286
1287        if kwargs.get('all_configs'):
1288            new_weight = weight
1289        else:
1290            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1291
1292        result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs))
1293        result[-1].reweighted = True
1294        result[-1].is_merged = obs[i].is_merged
1295
1296    return result
1297
1298
1299def correlate(obs_a, obs_b):
1300    """Correlate two observables.
1301
1302    Parameters
1303    ----------
1304    obs_a : Obs
1305        First observable
1306    obs_b : Obs
1307        Second observable
1308
1309    Notes
1310    -----
1311    Keep in mind to only correlate primary observables which have not been reweighted
1312    yet. The reweighting has to be applied after correlating the observables.
1313    Currently only works if ensembles are identical (this is not strictly necessary).
1314    """
1315
1316    if sorted(obs_a.names) != sorted(obs_b.names):
1317        raise Exception('Ensembles do not fit')
1318    if len(obs_a.cov_names) or len(obs_b.cov_names):
1319        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1320    for name in obs_a.names:
1321        if obs_a.shape[name] != obs_b.shape[name]:
1322            raise Exception('Shapes of ensemble', name, 'do not fit')
1323        if obs_a.idl[name] != obs_b.idl[name]:
1324            raise Exception('idl of ensemble', name, 'do not fit')
1325
1326    if obs_a.reweighted is True:
1327        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1328    if obs_b.reweighted is True:
1329        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1330
1331    new_samples = []
1332    new_idl = []
1333    for name in sorted(obs_a.names):
1334        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1335        new_idl.append(obs_a.idl[name])
1336
1337    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1338    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1339    o.reweighted = obs_a.reweighted or obs_b.reweighted
1340    return o
1341
1342
1343def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1344    r'''Calculates the covariance matrix of a set of observables.
1345
1346    The gamma method has to be applied first to all observables.
1347
1348    Parameters
1349    ----------
1350    obs : list or numpy.ndarray
1351        List or one dimensional array of Obs
1352    visualize : bool
1353        If True plots the corresponding normalized correlation matrix (default False).
1354    correlation : bool
1355        If True the correlation instead of the covariance is returned (default False).
1356    smooth : None or int
1357        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1358        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1359        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1360        small ones.
1361
1362    Notes
1363    -----
1364    The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1365    $$v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1366    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1367    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1368    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1369    '''
1370
1371    length = len(obs)
1372
1373    max_samples = np.max([o.N for o in obs])
1374    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1375        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1376
1377    cov = np.zeros((length, length))
1378    for i in range(length):
1379        for j in range(i, length):
1380            cov[i, j] = _covariance_element(obs[i], obs[j])
1381    cov = cov + cov.T - np.diag(np.diag(cov))
1382
1383    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1384
1385    if isinstance(smooth, int):
1386        corr = _smooth_eigenvalues(corr, smooth)
1387
1388    errors = [o.dvalue for o in obs]
1389    cov = np.diag(errors) @ corr @ np.diag(errors)
1390
1391    eigenvalues = np.linalg.eigh(cov)[0]
1392    if not np.all(eigenvalues >= 0):
1393        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1394
1395    if visualize:
1396        plt.matshow(corr, vmin=-1, vmax=1)
1397        plt.set_cmap('RdBu')
1398        plt.colorbar()
1399        plt.draw()
1400
1401    if correlation is True:
1402        return corr
1403    else:
1404        return cov
1405
1406
1407def _smooth_eigenvalues(corr, E):
1408    """Eigenvalue smoothing as described in hep-lat/9412087
1409
1410    corr : np.ndarray
1411        correlation matrix
1412    E : integer
1413        Number of eigenvalues to be left substantially unchanged
1414    """
1415    if not (2 < E < corr.shape[0] - 1):
1416        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1417    vals, vec = np.linalg.eigh(corr)
1418    lambda_min = np.mean(vals[:-E])
1419    vals[vals < lambda_min] = lambda_min
1420    vals /= np.mean(vals)
1421    return vec @ np.diag(vals) @ vec.T
1422
1423
1424def _covariance_element(obs1, obs2):
1425    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1426
1427    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1428        deltas1 = _expand_deltas_for_merge(deltas1, idx1, len(idx1), new_idx)
1429        deltas2 = _expand_deltas_for_merge(deltas2, idx2, len(idx2), new_idx)
1430        return np.sum(deltas1 * deltas2)
1431
1432    if set(obs1.names).isdisjoint(set(obs2.names)):
1433        return 0.0
1434
1435    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1436        raise Exception('The gamma method has to be applied to both Obs first.')
1437
1438    dvalue = 0.0
1439
1440    for e_name in obs1.mc_names:
1441
1442        if e_name not in obs2.mc_names:
1443            continue
1444
1445        idl_d = {}
1446        for r_name in obs1.e_content[e_name]:
1447            if r_name not in obs2.e_content[e_name]:
1448                continue
1449            idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]])
1450
1451        gamma = 0.0
1452
1453        for r_name in obs1.e_content[e_name]:
1454            if r_name not in obs2.e_content[e_name]:
1455                continue
1456            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1457
1458        if gamma == 0.0:
1459            continue
1460
1461        gamma_div = 0.0
1462        e_N = 0
1463        for r_name in obs1.e_content[e_name]:
1464            if r_name not in obs2.e_content[e_name]:
1465                continue
1466            gamma_div += calc_gamma(np.ones(obs1.shape[r_name]), np.ones(obs2.shape[r_name]), obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1467            e_N += len(idl_d[r_name])
1468        gamma /= max(gamma_div, 1.0)
1469
1470        # Bias correction hep-lat/0306017 eq. (49)
1471        dvalue += (1 + 1 / e_N) * gamma / e_N
1472
1473    for e_name in obs1.cov_names:
1474
1475        if e_name not in obs2.cov_names:
1476            continue
1477
1478        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1479
1480    return dvalue
1481
1482
1483def import_jackknife(jacks, name, idl=None):
1484    """Imports jackknife samples and returns an Obs
1485
1486    Parameters
1487    ----------
1488    jacks : numpy.ndarray
1489        numpy array containing the mean value as zeroth entry and
1490        the N jackknife samples as first to Nth entry.
1491    name : str
1492        name of the ensemble the samples are defined on.
1493    """
1494    length = len(jacks) - 1
1495    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1496    samples = jacks[1:] @ prj
1497    mean = np.mean(samples)
1498    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1499    new_obs._value = jacks[0]
1500    return new_obs
1501
1502
1503def merge_obs(list_of_obs):
1504    """Combine all observables in list_of_obs into one new observable
1505
1506    Parameters
1507    ----------
1508    list_of_obs : list
1509        list of the Obs object to be combined
1510
1511    Notes
1512    -----
1513    It is not possible to combine obs which are based on the same replicum
1514    """
1515    replist = [item for obs in list_of_obs for item in obs.names]
1516    if (len(replist) == len(set(replist))) is False:
1517        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1518    if any([len(o.cov_names) for o in list_of_obs]):
1519        raise Exception('Not possible to merge data that contains covobs!')
1520    new_dict = {}
1521    idl_dict = {}
1522    for o in list_of_obs:
1523        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1524                        for key in set(o.deltas) | set(o.r_values)})
1525        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1526
1527    names = sorted(new_dict.keys())
1528    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1529    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1530    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1531    return o
1532
1533
1534def cov_Obs(means, cov, name, grad=None):
1535    """Create an Obs based on mean(s) and a covariance matrix
1536
1537    Parameters
1538    ----------
1539    mean : list of floats or float
1540        N mean value(s) of the new Obs
1541    cov : list or array
1542        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1543    name : str
1544        identifier for the covariance matrix
1545    grad : list or array
1546        Gradient of the Covobs wrt. the means belonging to cov.
1547    """
1548
1549    def covobs_to_obs(co):
1550        """Make an Obs out of a Covobs
1551
1552        Parameters
1553        ----------
1554        co : Covobs
1555            Covobs to be embedded into the Obs
1556        """
1557        o = Obs([], [], means=[])
1558        o._value = co.value
1559        o.names.append(co.name)
1560        o._covobs[co.name] = co
1561        o._dvalue = np.sqrt(co.errsq())
1562        return o
1563
1564    ol = []
1565    if isinstance(means, (float, int)):
1566        means = [means]
1567
1568    for i in range(len(means)):
1569        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1570    if ol[0].covobs[name].N != len(means):
1571        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1572    if len(ol) == 1:
1573        return ol[0]
1574    return ol
#   class Obs:
View Source
 13class Obs:
 14    """Class for a general observable.
 15
 16    Instances of Obs are the basic objects of a pyerrors error analysis.
 17    They are initialized with a list which contains arrays of samples for
 18    different ensembles/replica and another list of same length which contains
 19    the names of the ensembles/replica. Mathematical operations can be
 20    performed on instances. The result is another instance of Obs. The error of
 21    an instance can be computed with the gamma_method. Also contains additional
 22    methods for output and visualization of the error calculation.
 23
 24    Attributes
 25    ----------
 26    S_global : float
 27        Standard value for S (default 2.0)
 28    S_dict : dict
 29        Dictionary for S values. If an entry for a given ensemble
 30        exists this overwrites the standard value for that ensemble.
 31    tau_exp_global : float
 32        Standard value for tau_exp (default 0.0)
 33    tau_exp_dict : dict
 34        Dictionary for tau_exp values. If an entry for a given ensemble exists
 35        this overwrites the standard value for that ensemble.
 36    N_sigma_global : float
 37        Standard value for N_sigma (default 1.0)
 38    N_sigma_dict : dict
 39        Dictionary for N_sigma values. If an entry for a given ensemble exists
 40        this overwrites the standard value for that ensemble.
 41    """
 42    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 43                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 44                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 45                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 46                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
 47
 48    S_global = 2.0
 49    S_dict = {}
 50    tau_exp_global = 0.0
 51    tau_exp_dict = {}
 52    N_sigma_global = 1.0
 53    N_sigma_dict = {}
 54    filter_eps = 1e-10
 55
 56    def __init__(self, samples, names, idl=None, **kwargs):
 57        """ Initialize Obs object.
 58
 59        Parameters
 60        ----------
 61        samples : list
 62            list of numpy arrays containing the Monte Carlo samples
 63        names : list
 64            list of strings labeling the individual samples
 65        idl : list, optional
 66            list of ranges or lists on which the samples are defined
 67        """
 68
 69        if kwargs.get("means") is None and len(samples):
 70            if len(samples) != len(names):
 71                raise Exception('Length of samples and names incompatible.')
 72            if idl is not None:
 73                if len(idl) != len(names):
 74                    raise Exception('Length of idl incompatible with samples and names.')
 75            name_length = len(names)
 76            if name_length > 1:
 77                if name_length != len(set(names)):
 78                    raise Exception('names are not unique.')
 79                if not all(isinstance(x, str) for x in names):
 80                    raise TypeError('All names have to be strings.')
 81            else:
 82                if not isinstance(names[0], str):
 83                    raise TypeError('All names have to be strings.')
 84            if min(len(x) for x in samples) <= 4:
 85                raise Exception('Samples have to have at least 5 entries.')
 86
 87        self.names = sorted(names)
 88        self.shape = {}
 89        self.r_values = {}
 90        self.deltas = {}
 91        self._covobs = {}
 92
 93        self._value = 0
 94        self.N = 0
 95        self.is_merged = {}
 96        self.idl = {}
 97        if idl is not None:
 98            for name, idx in sorted(zip(names, idl)):
 99                if isinstance(idx, range):
100                    self.idl[name] = idx
101                elif isinstance(idx, (list, np.ndarray)):
102                    dc = np.unique(np.diff(idx))
103                    if np.any(dc < 0):
104                        raise Exception("Unsorted idx for idl[%s]" % (name))
105                    if len(dc) == 1:
106                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
107                    else:
108                        self.idl[name] = list(idx)
109                else:
110                    raise Exception('incompatible type for idl[%s].' % (name))
111        else:
112            for name, sample in sorted(zip(names, samples)):
113                self.idl[name] = range(1, len(sample) + 1)
114
115        if kwargs.get("means") is not None:
116            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
117                self.shape[name] = len(self.idl[name])
118                self.N += self.shape[name]
119                self.r_values[name] = mean
120                self.deltas[name] = sample
121        else:
122            for name, sample in sorted(zip(names, samples)):
123                self.shape[name] = len(self.idl[name])
124                self.N += self.shape[name]
125                if len(sample) != self.shape[name]:
126                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
127                self.r_values[name] = np.mean(sample)
128                self.deltas[name] = sample - self.r_values[name]
129                self._value += self.shape[name] * self.r_values[name]
130            self._value /= self.N
131
132        self._dvalue = 0.0
133        self.ddvalue = 0.0
134        self.reweighted = False
135
136        self.tag = None
137
138    @property
139    def value(self):
140        return self._value
141
142    @property
143    def dvalue(self):
144        return self._dvalue
145
146    @property
147    def e_names(self):
148        return sorted(set([o.split('|')[0] for o in self.names]))
149
150    @property
151    def cov_names(self):
152        return sorted(set([o for o in self.covobs.keys()]))
153
154    @property
155    def mc_names(self):
156        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
157
158    @property
159    def e_content(self):
160        res = {}
161        for e, e_name in enumerate(self.e_names):
162            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
163            if e_name in self.names:
164                res[e_name].append(e_name)
165        return res
166
167    @property
168    def covobs(self):
169        return self._covobs
170
171    def gamma_method(self, **kwargs):
172        """Estimate the error and related properties of the Obs.
173
174        Parameters
175        ----------
176        S : float
177            specifies a custom value for the parameter S (default 2.0).
178            If set to 0 it is assumed that the data exhibits no
179            autocorrelation. In this case the error estimates coincides
180            with the sample standard error.
181        tau_exp : float
182            positive value triggers the critical slowing down analysis
183            (default 0.0).
184        N_sigma : float
185            number of standard deviations from zero until the tail is
186            attached to the autocorrelation function (default 1).
187        fft : bool
188            determines whether the fft algorithm is used for the computation
189            of the autocorrelation function (default True)
190        """
191
192        e_content = self.e_content
193        self.e_dvalue = {}
194        self.e_ddvalue = {}
195        self.e_tauint = {}
196        self.e_dtauint = {}
197        self.e_windowsize = {}
198        self.e_n_tauint = {}
199        self.e_n_dtauint = {}
200        e_gamma = {}
201        self.e_rho = {}
202        self.e_drho = {}
203        self._dvalue = 0
204        self.ddvalue = 0
205
206        self.S = {}
207        self.tau_exp = {}
208        self.N_sigma = {}
209
210        if kwargs.get('fft') is False:
211            fft = False
212        else:
213            fft = True
214
215        def _parse_kwarg(kwarg_name):
216            if kwarg_name in kwargs:
217                tmp = kwargs.get(kwarg_name)
218                if isinstance(tmp, (int, float)):
219                    if tmp < 0:
220                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
221                    for e, e_name in enumerate(self.e_names):
222                        getattr(self, kwarg_name)[e_name] = tmp
223                else:
224                    raise TypeError(kwarg_name + ' is not in proper format.')
225            else:
226                for e, e_name in enumerate(self.e_names):
227                    if e_name in getattr(Obs, kwarg_name + '_dict'):
228                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
229                    else:
230                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
231
232        _parse_kwarg('S')
233        _parse_kwarg('tau_exp')
234        _parse_kwarg('N_sigma')
235
236        for e, e_name in enumerate(self.mc_names):
237            r_length = []
238            for r_name in e_content[e_name]:
239                if isinstance(self.idl[r_name], range):
240                    r_length.append(len(self.idl[r_name]))
241                else:
242                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
243
244            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
245            w_max = max(r_length) // 2
246            e_gamma[e_name] = np.zeros(w_max)
247            self.e_rho[e_name] = np.zeros(w_max)
248            self.e_drho[e_name] = np.zeros(w_max)
249
250            for r_name in e_content[e_name]:
251                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
252
253            gamma_div = np.zeros(w_max)
254            for r_name in e_content[e_name]:
255                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
256            gamma_div[gamma_div < 1] = 1.0
257            e_gamma[e_name] /= gamma_div[:w_max]
258
259            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
260                self.e_tauint[e_name] = 0.5
261                self.e_dtauint[e_name] = 0.0
262                self.e_dvalue[e_name] = 0.0
263                self.e_ddvalue[e_name] = 0.0
264                self.e_windowsize[e_name] = 0
265                continue
266
267            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
268            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
269            # Make sure no entry of tauint is smaller than 0.5
270            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
271            # hep-lat/0306017 eq. (42)
272            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
273            self.e_n_dtauint[e_name][0] = 0.0
274
275            def _compute_drho(i):
276                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
277                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
278
279            _compute_drho(1)
280            if self.tau_exp[e_name] > 0:
281                texp = self.tau_exp[e_name]
282                # Critical slowing down analysis
283                if w_max // 2 <= 1:
284                    raise Exception("Need at least 8 samples for tau_exp error analysis")
285                for n in range(1, w_max // 2):
286                    _compute_drho(n + 1)
287                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
288                        # Bias correction hep-lat/0306017 eq. (49) included
289                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
290                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
291                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
292                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
293                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
294                        self.e_windowsize[e_name] = n
295                        break
296            else:
297                if self.S[e_name] == 0.0:
298                    self.e_tauint[e_name] = 0.5
299                    self.e_dtauint[e_name] = 0.0
300                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
301                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
302                    self.e_windowsize[e_name] = 0
303                else:
304                    # Standard automatic windowing procedure
305                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
306                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
307                    for n in range(1, w_max):
308                        if n < w_max // 2 - 2:
309                            _compute_drho(n + 1)
310                        if g_w[n - 1] < 0 or n >= w_max - 1:
311                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
312                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
313                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
314                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
315                            self.e_windowsize[e_name] = n
316                            break
317
318            self._dvalue += self.e_dvalue[e_name] ** 2
319            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
320
321        for e_name in self.cov_names:
322            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
323            self.e_ddvalue[e_name] = 0
324            self._dvalue += self.e_dvalue[e_name]**2
325
326        self._dvalue = np.sqrt(self._dvalue)
327        if self._dvalue == 0.0:
328            self.ddvalue = 0.0
329        else:
330            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
331        return
332
333    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
334        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
335           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
336
337        Parameters
338        ----------
339        deltas : list
340            List of fluctuations
341        idx : list
342            List or range of configurations on which the deltas are defined.
343        shape : int
344            Number of configurations in idx.
345        w_max : int
346            Upper bound for the summation window.
347        fft : bool
348            determines whether the fft algorithm is used for the computation
349            of the autocorrelation function.
350        """
351        gamma = np.zeros(w_max)
352        deltas = _expand_deltas(deltas, idx, shape)
353        new_shape = len(deltas)
354        if fft:
355            max_gamma = min(new_shape, w_max)
356            # The padding for the fft has to be even
357            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
358            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
359        else:
360            for n in range(w_max):
361                if new_shape - n >= 0:
362                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
363
364        return gamma
365
366    def details(self, ens_content=True):
367        """Output detailed properties of the Obs.
368
369        Parameters
370        ----------
371        ens_content : bool
372            print details about the ensembles and replica if true.
373        """
374        if self.tag is not None:
375            print("Description:", self.tag)
376        if not hasattr(self, 'e_dvalue'):
377            print('Result\t %3.8e' % (self.value))
378        else:
379            if self.value == 0.0:
380                percentage = np.nan
381            else:
382                percentage = np.abs(self._dvalue / self.value) * 100
383            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
384            if len(self.e_names) > 1:
385                print(' Ensemble errors:')
386            for e_name in self.mc_names:
387                if len(self.e_names) > 1:
388                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
389                if self.tau_exp[e_name] > 0:
390                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
391                else:
392                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
393            for e_name in self.cov_names:
394                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
395        if ens_content is True:
396            if len(self.e_names) == 1:
397                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
398            else:
399                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
400            my_string_list = []
401            for key, value in sorted(self.e_content.items()):
402                if key not in self.covobs:
403                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
404                    if len(value) == 1:
405                        my_string += f': {self.shape[value[0]]} configurations'
406                        if isinstance(self.idl[value[0]], range):
407                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
408                        else:
409                            my_string += ' (irregular range)'
410                    else:
411                        sublist = []
412                        for v in value:
413                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
414                            my_substring += f': {self.shape[v]} configurations'
415                            if isinstance(self.idl[v], range):
416                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
417                            else:
418                                my_substring += ' (irregular range)'
419                            sublist.append(my_substring)
420
421                        my_string += '\n' + '\n'.join(sublist)
422                else:
423                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
424                my_string_list.append(my_string)
425            print('\n'.join(my_string_list))
426
427    def is_zero_within_error(self, sigma=1):
428        """Checks whether the observable is zero within 'sigma' standard errors.
429
430        Parameters
431        ----------
432        sigma : int
433            Number of standard errors used for the check.
434
435        Works only properly when the gamma method was run.
436        """
437        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
438
439    def is_zero(self, atol=1e-10):
440        """Checks whether the observable is zero within a given tolerance.
441
442        Parameters
443        ----------
444        atol : float
445            Absolute tolerance (for details see numpy documentation).
446        """
447        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
448
449    def plot_tauint(self, save=None):
450        """Plot integrated autocorrelation time for each ensemble.
451
452        Parameters
453        ----------
454        save : str
455            saves the figure to a file named 'save' if.
456        """
457        if not hasattr(self, 'e_dvalue'):
458            raise Exception('Run the gamma method first.')
459
460        for e, e_name in enumerate(self.mc_names):
461            fig = plt.figure()
462            plt.xlabel(r'$W$')
463            plt.ylabel(r'$\tau_\mathrm{int}$')
464            length = int(len(self.e_n_tauint[e_name]))
465            if self.tau_exp[e_name] > 0:
466                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
467                x_help = np.arange(2 * self.tau_exp[e_name])
468                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
469                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
470                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
471                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
472                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
473                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
474                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
475            else:
476                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
477                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
478
479            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
480            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
481            plt.legend()
482            plt.xlim(-0.5, xmax)
483            ylim = plt.ylim()
484            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
485            plt.draw()
486            if save:
487                fig.savefig(save + "_" + str(e))
488
489    def plot_rho(self, save=None):
490        """Plot normalized autocorrelation function time for each ensemble.
491
492        Parameters
493        ----------
494        save : str
495            saves the figure to a file named 'save' if.
496        """
497        if not hasattr(self, 'e_dvalue'):
498            raise Exception('Run the gamma method first.')
499        for e, e_name in enumerate(self.mc_names):
500            fig = plt.figure()
501            plt.xlabel('W')
502            plt.ylabel('rho')
503            length = int(len(self.e_drho[e_name]))
504            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
505            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
506            if self.tau_exp[e_name] > 0:
507                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
508                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
509                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
510                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
511            else:
512                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
513                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
514            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
515            plt.xlim(-0.5, xmax)
516            plt.draw()
517            if save:
518                fig.savefig(save + "_" + str(e))
519
520    def plot_rep_dist(self):
521        """Plot replica distribution for each ensemble with more than one replicum."""
522        if not hasattr(self, 'e_dvalue'):
523            raise Exception('Run the gamma method first.')
524        for e, e_name in enumerate(self.mc_names):
525            if len(self.e_content[e_name]) == 1:
526                print('No replica distribution for a single replicum (', e_name, ')')
527                continue
528            r_length = []
529            sub_r_mean = 0
530            for r, r_name in enumerate(self.e_content[e_name]):
531                r_length.append(len(self.deltas[r_name]))
532                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
533            e_N = np.sum(r_length)
534            sub_r_mean /= e_N
535            arr = np.zeros(len(self.e_content[e_name]))
536            for r, r_name in enumerate(self.e_content[e_name]):
537                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
538            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
539            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
540            plt.draw()
541
542    def plot_history(self, expand=True):
543        """Plot derived Monte Carlo history for each ensemble
544
545        Parameters
546        ----------
547        expand : bool
548            show expanded history for irregular Monte Carlo chains (default: True).
549        """
550        for e, e_name in enumerate(self.mc_names):
551            plt.figure()
552            r_length = []
553            tmp = []
554            tmp_expanded = []
555            for r, r_name in enumerate(self.e_content[e_name]):
556                tmp.append(self.deltas[r_name] + self.r_values[r_name])
557                if expand:
558                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
559                    r_length.append(len(tmp_expanded[-1]))
560                else:
561                    r_length.append(len(tmp[-1]))
562            e_N = np.sum(r_length)
563            x = np.arange(e_N)
564            y_test = np.concatenate(tmp, axis=0)
565            if expand:
566                y = np.concatenate(tmp_expanded, axis=0)
567            else:
568                y = y_test
569            plt.errorbar(x, y, fmt='.', markersize=3)
570            plt.xlim(-0.5, e_N - 0.5)
571            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
572            plt.draw()
573
574    def plot_piechart(self, save=None):
575        """Plot piechart which shows the fractional contribution of each
576        ensemble to the error and returns a dictionary containing the fractions.
577
578        Parameters
579        ----------
580        save : str
581            saves the figure to a file named 'save' if.
582        """
583        if not hasattr(self, 'e_dvalue'):
584            raise Exception('Run the gamma method first.')
585        if np.isclose(0.0, self._dvalue, atol=1e-15):
586            raise Exception('Error is 0.0')
587        labels = self.e_names
588        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
589        fig1, ax1 = plt.subplots()
590        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
591        ax1.axis('equal')
592        plt.draw()
593        if save:
594            fig1.savefig(save)
595
596        return dict(zip(self.e_names, sizes))
597
598    def dump(self, filename, datatype="json.gz", description="", **kwargs):
599        """Dump the Obs to a file 'name' of chosen format.
600
601        Parameters
602        ----------
603        filename : str
604            name of the file to be saved.
605        datatype : str
606            Format of the exported file. Supported formats include
607            "json.gz" and "pickle"
608        description : str
609            Description for output file, only relevant for json.gz format.
610        path : str
611            specifies a custom path for the file (default '.')
612        """
613        if 'path' in kwargs:
614            file_name = kwargs.get('path') + '/' + filename
615        else:
616            file_name = filename
617
618        if datatype == "json.gz":
619            from .input.json import dump_to_json
620            dump_to_json([self], file_name, description=description)
621        elif datatype == "pickle":
622            with open(file_name + '.p', 'wb') as fb:
623                pickle.dump(self, fb)
624        else:
625            raise Exception("Unknown datatype " + str(datatype))
626
627    def export_jackknife(self):
628        """Export jackknife samples from the Obs
629
630        Returns
631        -------
632        numpy.ndarray
633            Returns a numpy array of length N + 1 where N is the number of samples
634            for the given ensemble and replicum. The zeroth entry of the array contains
635            the mean value of the Obs, entries 1 to N contain the N jackknife samples
636            derived from the Obs. The current implementation only works for observables
637            defined on exactly one ensemble and replicum. The derived jackknife samples
638            should agree with samples from a full jackknife analysis up to O(1/N).
639        """
640
641        if len(self.names) != 1:
642            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
643
644        name = self.names[0]
645        full_data = self.deltas[name] + self.r_values[name]
646        n = full_data.size
647        mean = self.value
648        tmp_jacks = np.zeros(n + 1)
649        tmp_jacks[0] = mean
650        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
651        return tmp_jacks
652
653    def __float__(self):
654        return float(self.value)
655
656    def __repr__(self):
657        return 'Obs[' + str(self) + ']'
658
659    def __str__(self):
660        if self._dvalue == 0.0:
661            return str(self.value)
662        fexp = np.floor(np.log10(self._dvalue))
663        if fexp < 0.0:
664            return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
665        elif fexp == 0.0:
666            return '{:.1f}({:1.1f})'.format(self.value, self._dvalue)
667        else:
668            return '{:.0f}({:2.0f})'.format(self.value, self._dvalue)
669
670    # Overload comparisons
671    def __lt__(self, other):
672        return self.value < other
673
674    def __le__(self, other):
675        return self.value <= other
676
677    def __gt__(self, other):
678        return self.value > other
679
680    def __ge__(self, other):
681        return self.value >= other
682
683    def __eq__(self, other):
684        return (self - other).is_zero()
685
686    def __ne__(self, other):
687        return not (self - other).is_zero()
688
689    # Overload math operations
690    def __add__(self, y):
691        if isinstance(y, Obs):
692            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
693        else:
694            if isinstance(y, np.ndarray):
695                return np.array([self + o for o in y])
696            elif y.__class__.__name__ in ['Corr', 'CObs']:
697                return NotImplemented
698            else:
699                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
700
701    def __radd__(self, y):
702        return self + y
703
704    def __mul__(self, y):
705        if isinstance(y, Obs):
706            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
707        else:
708            if isinstance(y, np.ndarray):
709                return np.array([self * o for o in y])
710            elif isinstance(y, complex):
711                return CObs(self * y.real, self * y.imag)
712            elif y.__class__.__name__ in ['Corr', 'CObs']:
713                return NotImplemented
714            else:
715                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
716
717    def __rmul__(self, y):
718        return self * y
719
720    def __sub__(self, y):
721        if isinstance(y, Obs):
722            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
723        else:
724            if isinstance(y, np.ndarray):
725                return np.array([self - o for o in y])
726            elif y.__class__.__name__ in ['Corr', 'CObs']:
727                return NotImplemented
728            else:
729                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
730
731    def __rsub__(self, y):
732        return -1 * (self - y)
733
734    def __pos__(self):
735        return self
736
737    def __neg__(self):
738        return -1 * self
739
740    def __truediv__(self, y):
741        if isinstance(y, Obs):
742            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
743        else:
744            if isinstance(y, np.ndarray):
745                return np.array([self / o for o in y])
746            elif y.__class__.__name__ in ['Corr', 'CObs']:
747                return NotImplemented
748            else:
749                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
750
751    def __rtruediv__(self, y):
752        if isinstance(y, Obs):
753            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
754        else:
755            if isinstance(y, np.ndarray):
756                return np.array([o / self for o in y])
757            elif y.__class__.__name__ in ['Corr', 'CObs']:
758                return NotImplemented
759            else:
760                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
761
762    def __pow__(self, y):
763        if isinstance(y, Obs):
764            return derived_observable(lambda x: x[0] ** x[1], [self, y])
765        else:
766            return derived_observable(lambda x: x[0] ** y, [self])
767
768    def __rpow__(self, y):
769        if isinstance(y, Obs):
770            return derived_observable(lambda x: x[0] ** x[1], [y, self])
771        else:
772            return derived_observable(lambda x: y ** x[0], [self])
773
774    def __abs__(self):
775        return derived_observable(lambda x: anp.abs(x[0]), [self])
776
777    # Overload numpy functions
778    def sqrt(self):
779        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
780
781    def log(self):
782        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
783
784    def exp(self):
785        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
786
787    def sin(self):
788        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
789
790    def cos(self):
791        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
792
793    def tan(self):
794        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
795
796    def arcsin(self):
797        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
798
799    def arccos(self):
800        return derived_observable(lambda x: anp.arccos(x[0]), [self])
801
802    def arctan(self):
803        return derived_observable(lambda x: anp.arctan(x[0]), [self])
804
805    def sinh(self):
806        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
807
808    def cosh(self):
809        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
810
811    def tanh(self):
812        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
813
814    def arcsinh(self):
815        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
816
817    def arccosh(self):
818        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
819
820    def arctanh(self):
821        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
#   Obs(samples, names, idl=None, **kwargs)
View Source
 56    def __init__(self, samples, names, idl=None, **kwargs):
 57        """ Initialize Obs object.
 58
 59        Parameters
 60        ----------
 61        samples : list
 62            list of numpy arrays containing the Monte Carlo samples
 63        names : list
 64            list of strings labeling the individual samples
 65        idl : list, optional
 66            list of ranges or lists on which the samples are defined
 67        """
 68
 69        if kwargs.get("means") is None and len(samples):
 70            if len(samples) != len(names):
 71                raise Exception('Length of samples and names incompatible.')
 72            if idl is not None:
 73                if len(idl) != len(names):
 74                    raise Exception('Length of idl incompatible with samples and names.')
 75            name_length = len(names)
 76            if name_length > 1:
 77                if name_length != len(set(names)):
 78                    raise Exception('names are not unique.')
 79                if not all(isinstance(x, str) for x in names):
 80                    raise TypeError('All names have to be strings.')
 81            else:
 82                if not isinstance(names[0], str):
 83                    raise TypeError('All names have to be strings.')
 84            if min(len(x) for x in samples) <= 4:
 85                raise Exception('Samples have to have at least 5 entries.')
 86
 87        self.names = sorted(names)
 88        self.shape = {}
 89        self.r_values = {}
 90        self.deltas = {}
 91        self._covobs = {}
 92
 93        self._value = 0
 94        self.N = 0
 95        self.is_merged = {}
 96        self.idl = {}
 97        if idl is not None:
 98            for name, idx in sorted(zip(names, idl)):
 99                if isinstance(idx, range):
100                    self.idl[name] = idx
101                elif isinstance(idx, (list, np.ndarray)):
102                    dc = np.unique(np.diff(idx))
103                    if np.any(dc < 0):
104                        raise Exception("Unsorted idx for idl[%s]" % (name))
105                    if len(dc) == 1:
106                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
107                    else:
108                        self.idl[name] = list(idx)
109                else:
110                    raise Exception('incompatible type for idl[%s].' % (name))
111        else:
112            for name, sample in sorted(zip(names, samples)):
113                self.idl[name] = range(1, len(sample) + 1)
114
115        if kwargs.get("means") is not None:
116            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
117                self.shape[name] = len(self.idl[name])
118                self.N += self.shape[name]
119                self.r_values[name] = mean
120                self.deltas[name] = sample
121        else:
122            for name, sample in sorted(zip(names, samples)):
123                self.shape[name] = len(self.idl[name])
124                self.N += self.shape[name]
125                if len(sample) != self.shape[name]:
126                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
127                self.r_values[name] = np.mean(sample)
128                self.deltas[name] = sample - self.r_values[name]
129                self._value += self.shape[name] * self.r_values[name]
130            self._value /= self.N
131
132        self._dvalue = 0.0
133        self.ddvalue = 0.0
134        self.reweighted = False
135
136        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
#   S_global = 2.0
#   S_dict = {}
#   tau_exp_global = 0.0
#   tau_exp_dict = {}
#   N_sigma_global = 1.0
#   N_sigma_dict = {}
#   filter_eps = 1e-10
#   names
#   shape
#   r_values
#   deltas
#   is_merged
#   idl
#   ddvalue
#   reweighted
#   tag
#   value
#   dvalue
#   e_names
#   cov_names
#   mc_names
#   e_content
#   covobs
#   def gamma_method(self, **kwargs):
View Source
171    def gamma_method(self, **kwargs):
172        """Estimate the error and related properties of the Obs.
173
174        Parameters
175        ----------
176        S : float
177            specifies a custom value for the parameter S (default 2.0).
178            If set to 0 it is assumed that the data exhibits no
179            autocorrelation. In this case the error estimates coincides
180            with the sample standard error.
181        tau_exp : float
182            positive value triggers the critical slowing down analysis
183            (default 0.0).
184        N_sigma : float
185            number of standard deviations from zero until the tail is
186            attached to the autocorrelation function (default 1).
187        fft : bool
188            determines whether the fft algorithm is used for the computation
189            of the autocorrelation function (default True)
190        """
191
192        e_content = self.e_content
193        self.e_dvalue = {}
194        self.e_ddvalue = {}
195        self.e_tauint = {}
196        self.e_dtauint = {}
197        self.e_windowsize = {}
198        self.e_n_tauint = {}
199        self.e_n_dtauint = {}
200        e_gamma = {}
201        self.e_rho = {}
202        self.e_drho = {}
203        self._dvalue = 0
204        self.ddvalue = 0
205
206        self.S = {}
207        self.tau_exp = {}
208        self.N_sigma = {}
209
210        if kwargs.get('fft') is False:
211            fft = False
212        else:
213            fft = True
214
215        def _parse_kwarg(kwarg_name):
216            if kwarg_name in kwargs:
217                tmp = kwargs.get(kwarg_name)
218                if isinstance(tmp, (int, float)):
219                    if tmp < 0:
220                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
221                    for e, e_name in enumerate(self.e_names):
222                        getattr(self, kwarg_name)[e_name] = tmp
223                else:
224                    raise TypeError(kwarg_name + ' is not in proper format.')
225            else:
226                for e, e_name in enumerate(self.e_names):
227                    if e_name in getattr(Obs, kwarg_name + '_dict'):
228                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
229                    else:
230                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
231
232        _parse_kwarg('S')
233        _parse_kwarg('tau_exp')
234        _parse_kwarg('N_sigma')
235
236        for e, e_name in enumerate(self.mc_names):
237            r_length = []
238            for r_name in e_content[e_name]:
239                if isinstance(self.idl[r_name], range):
240                    r_length.append(len(self.idl[r_name]))
241                else:
242                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
243
244            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
245            w_max = max(r_length) // 2
246            e_gamma[e_name] = np.zeros(w_max)
247            self.e_rho[e_name] = np.zeros(w_max)
248            self.e_drho[e_name] = np.zeros(w_max)
249
250            for r_name in e_content[e_name]:
251                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
252
253            gamma_div = np.zeros(w_max)
254            for r_name in e_content[e_name]:
255                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
256            gamma_div[gamma_div < 1] = 1.0
257            e_gamma[e_name] /= gamma_div[:w_max]
258
259            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
260                self.e_tauint[e_name] = 0.5
261                self.e_dtauint[e_name] = 0.0
262                self.e_dvalue[e_name] = 0.0
263                self.e_ddvalue[e_name] = 0.0
264                self.e_windowsize[e_name] = 0
265                continue
266
267            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
268            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
269            # Make sure no entry of tauint is smaller than 0.5
270            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
271            # hep-lat/0306017 eq. (42)
272            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
273            self.e_n_dtauint[e_name][0] = 0.0
274
275            def _compute_drho(i):
276                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
277                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
278
279            _compute_drho(1)
280            if self.tau_exp[e_name] > 0:
281                texp = self.tau_exp[e_name]
282                # Critical slowing down analysis
283                if w_max // 2 <= 1:
284                    raise Exception("Need at least 8 samples for tau_exp error analysis")
285                for n in range(1, w_max // 2):
286                    _compute_drho(n + 1)
287                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
288                        # Bias correction hep-lat/0306017 eq. (49) included
289                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
290                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
291                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
292                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
293                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
294                        self.e_windowsize[e_name] = n
295                        break
296            else:
297                if self.S[e_name] == 0.0:
298                    self.e_tauint[e_name] = 0.5
299                    self.e_dtauint[e_name] = 0.0
300                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
301                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
302                    self.e_windowsize[e_name] = 0
303                else:
304                    # Standard automatic windowing procedure
305                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
306                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
307                    for n in range(1, w_max):
308                        if n < w_max // 2 - 2:
309                            _compute_drho(n + 1)
310                        if g_w[n - 1] < 0 or n >= w_max - 1:
311                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
312                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
313                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
314                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
315                            self.e_windowsize[e_name] = n
316                            break
317
318            self._dvalue += self.e_dvalue[e_name] ** 2
319            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
320
321        for e_name in self.cov_names:
322            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
323            self.e_ddvalue[e_name] = 0
324            self._dvalue += self.e_dvalue[e_name]**2
325
326        self._dvalue = np.sqrt(self._dvalue)
327        if self._dvalue == 0.0:
328            self.ddvalue = 0.0
329        else:
330            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
331        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
#   def details(self, ens_content=True):
View Source
366    def details(self, ens_content=True):
367        """Output detailed properties of the Obs.
368
369        Parameters
370        ----------
371        ens_content : bool
372            print details about the ensembles and replica if true.
373        """
374        if self.tag is not None:
375            print("Description:", self.tag)
376        if not hasattr(self, 'e_dvalue'):
377            print('Result\t %3.8e' % (self.value))
378        else:
379            if self.value == 0.0:
380                percentage = np.nan
381            else:
382                percentage = np.abs(self._dvalue / self.value) * 100
383            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
384            if len(self.e_names) > 1:
385                print(' Ensemble errors:')
386            for e_name in self.mc_names:
387                if len(self.e_names) > 1:
388                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
389                if self.tau_exp[e_name] > 0:
390                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
391                else:
392                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
393            for e_name in self.cov_names:
394                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
395        if ens_content is True:
396            if len(self.e_names) == 1:
397                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
398            else:
399                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
400            my_string_list = []
401            for key, value in sorted(self.e_content.items()):
402                if key not in self.covobs:
403                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
404                    if len(value) == 1:
405                        my_string += f': {self.shape[value[0]]} configurations'
406                        if isinstance(self.idl[value[0]], range):
407                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
408                        else:
409                            my_string += ' (irregular range)'
410                    else:
411                        sublist = []
412                        for v in value:
413                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
414                            my_substring += f': {self.shape[v]} configurations'
415                            if isinstance(self.idl[v], range):
416                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
417                            else:
418                                my_substring += ' (irregular range)'
419                            sublist.append(my_substring)
420
421                        my_string += '\n' + '\n'.join(sublist)
422                else:
423                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
424                my_string_list.append(my_string)
425            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
#   def is_zero_within_error(self, sigma=1):
View Source
427    def is_zero_within_error(self, sigma=1):
428        """Checks whether the observable is zero within 'sigma' standard errors.
429
430        Parameters
431        ----------
432        sigma : int
433            Number of standard errors used for the check.
434
435        Works only properly when the gamma method was run.
436        """
437        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
#   def is_zero(self, atol=1e-10):
View Source
439    def is_zero(self, atol=1e-10):
440        """Checks whether the observable is zero within a given tolerance.
441
442        Parameters
443        ----------
444        atol : float
445            Absolute tolerance (for details see numpy documentation).
446        """
447        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
#   def plot_tauint(self, save=None):
View Source
449    def plot_tauint(self, save=None):
450        """Plot integrated autocorrelation time for each ensemble.
451
452        Parameters
453        ----------
454        save : str
455            saves the figure to a file named 'save' if.
456        """
457        if not hasattr(self, 'e_dvalue'):
458            raise Exception('Run the gamma method first.')
459
460        for e, e_name in enumerate(self.mc_names):
461            fig = plt.figure()
462            plt.xlabel(r'$W$')
463            plt.ylabel(r'$\tau_\mathrm{int}$')
464            length = int(len(self.e_n_tauint[e_name]))
465            if self.tau_exp[e_name] > 0:
466                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
467                x_help = np.arange(2 * self.tau_exp[e_name])
468                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
469                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
470                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
471                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
472                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
473                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
474                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
475            else:
476                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
477                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
478
479            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
480            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
481            plt.legend()
482            plt.xlim(-0.5, xmax)
483            ylim = plt.ylim()
484            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
485            plt.draw()
486            if save:
487                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
#   def plot_rho(self, save=None):
View Source
489    def plot_rho(self, save=None):
490        """Plot normalized autocorrelation function time for each ensemble.
491
492        Parameters
493        ----------
494        save : str
495            saves the figure to a file named 'save' if.
496        """
497        if not hasattr(self, 'e_dvalue'):
498            raise Exception('Run the gamma method first.')
499        for e, e_name in enumerate(self.mc_names):
500            fig = plt.figure()
501            plt.xlabel('W')
502            plt.ylabel('rho')
503            length = int(len(self.e_drho[e_name]))
504            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
505            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
506            if self.tau_exp[e_name] > 0:
507                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
508                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
509                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
510                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
511            else:
512                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
513                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
514            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
515            plt.xlim(-0.5, xmax)
516            plt.draw()
517            if save:
518                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
#   def plot_rep_dist(self):
View Source
520    def plot_rep_dist(self):
521        """Plot replica distribution for each ensemble with more than one replicum."""
522        if not hasattr(self, 'e_dvalue'):
523            raise Exception('Run the gamma method first.')
524        for e, e_name in enumerate(self.mc_names):
525            if len(self.e_content[e_name]) == 1:
526                print('No replica distribution for a single replicum (', e_name, ')')
527                continue
528            r_length = []
529            sub_r_mean = 0
530            for r, r_name in enumerate(self.e_content[e_name]):
531                r_length.append(len(self.deltas[r_name]))
532                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
533            e_N = np.sum(r_length)
534            sub_r_mean /= e_N
535            arr = np.zeros(len(self.e_content[e_name]))
536            for r, r_name in enumerate(self.e_content[e_name]):
537                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
538            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
539            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
540            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

#   def plot_history(self, expand=True):
View Source
542    def plot_history(self, expand=True):
543        """Plot derived Monte Carlo history for each ensemble
544
545        Parameters
546        ----------
547        expand : bool
548            show expanded history for irregular Monte Carlo chains (default: True).
549        """
550        for e, e_name in enumerate(self.mc_names):
551            plt.figure()
552            r_length = []
553            tmp = []
554            tmp_expanded = []
555            for r, r_name in enumerate(self.e_content[e_name]):
556                tmp.append(self.deltas[r_name] + self.r_values[r_name])
557                if expand:
558                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
559                    r_length.append(len(tmp_expanded[-1]))
560                else:
561                    r_length.append(len(tmp[-1]))
562            e_N = np.sum(r_length)
563            x = np.arange(e_N)
564            y_test = np.concatenate(tmp, axis=0)
565            if expand:
566                y = np.concatenate(tmp_expanded, axis=0)
567            else:
568                y = y_test
569            plt.errorbar(x, y, fmt='.', markersize=3)
570            plt.xlim(-0.5, e_N - 0.5)
571            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
572            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
#   def plot_piechart(self, save=None):
View Source
574    def plot_piechart(self, save=None):
575        """Plot piechart which shows the fractional contribution of each
576        ensemble to the error and returns a dictionary containing the fractions.
577
578        Parameters
579        ----------
580        save : str
581            saves the figure to a file named 'save' if.
582        """
583        if not hasattr(self, 'e_dvalue'):
584            raise Exception('Run the gamma method first.')
585        if np.isclose(0.0, self._dvalue, atol=1e-15):
586            raise Exception('Error is 0.0')
587        labels = self.e_names
588        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
589        fig1, ax1 = plt.subplots()
590        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
591        ax1.axis('equal')
592        plt.draw()
593        if save:
594            fig1.savefig(save)
595
596        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
#   def dump(self, filename, datatype='json.gz', description='', **kwargs):
View Source
598    def dump(self, filename, datatype="json.gz", description="", **kwargs):
599        """Dump the Obs to a file 'name' of chosen format.
600
601        Parameters
602        ----------
603        filename : str
604            name of the file to be saved.
605        datatype : str
606            Format of the exported file. Supported formats include
607            "json.gz" and "pickle"
608        description : str
609            Description for output file, only relevant for json.gz format.
610        path : str
611            specifies a custom path for the file (default '.')
612        """
613        if 'path' in kwargs:
614            file_name = kwargs.get('path') + '/' + filename
615        else:
616            file_name = filename
617
618        if datatype == "json.gz":
619            from .input.json import dump_to_json
620            dump_to_json([self], file_name, description=description)
621        elif datatype == "pickle":
622            with open(file_name + '.p', 'wb') as fb:
623                pickle.dump(self, fb)
624        else:
625            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
#   def export_jackknife(self):
View Source
627    def export_jackknife(self):
628        """Export jackknife samples from the Obs
629
630        Returns
631        -------
632        numpy.ndarray
633            Returns a numpy array of length N + 1 where N is the number of samples
634            for the given ensemble and replicum. The zeroth entry of the array contains
635            the mean value of the Obs, entries 1 to N contain the N jackknife samples
636            derived from the Obs. The current implementation only works for observables
637            defined on exactly one ensemble and replicum. The derived jackknife samples
638            should agree with samples from a full jackknife analysis up to O(1/N).
639        """
640
641        if len(self.names) != 1:
642            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
643
644        name = self.names[0]
645        full_data = self.deltas[name] + self.r_values[name]
646        n = full_data.size
647        mean = self.value
648        tmp_jacks = np.zeros(n + 1)
649        tmp_jacks[0] = mean
650        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
651        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
#   def sqrt(self):
View Source
778    def sqrt(self):
779        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
#   def log(self):
View Source
781    def log(self):
782        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
#   def exp(self):
View Source
784    def exp(self):
785        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
#   def sin(self):
View Source
787    def sin(self):
788        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
#   def cos(self):
View Source
790    def cos(self):
791        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
#   def tan(self):
View Source
793    def tan(self):
794        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
#   def arcsin(self):
View Source
796    def arcsin(self):
797        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
#   def arccos(self):
View Source
799    def arccos(self):
800        return derived_observable(lambda x: anp.arccos(x[0]), [self])
#   def arctan(self):
View Source
802    def arctan(self):
803        return derived_observable(lambda x: anp.arctan(x[0]), [self])
#   def sinh(self):
View Source
805    def sinh(self):
806        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
#   def cosh(self):
View Source
808    def cosh(self):
809        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
#   def tanh(self):
View Source
811    def tanh(self):
812        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
#   def arcsinh(self):
View Source
814    def arcsinh(self):
815        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
#   def arccosh(self):
View Source
817    def arccosh(self):
818        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
#   def arctanh(self):
View Source
820    def arctanh(self):
821        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
#   N_sigma
#   e_ddvalue
#   e_drho
#   e_dtauint
#   e_dvalue
#   e_n_dtauint
#   e_n_tauint
#   e_rho
#   e_tauint
#   e_windowsize
#   tau_exp
#   class CObs:
View Source
824class CObs:
825    """Class for a complex valued observable."""
826    __slots__ = ['_real', '_imag', 'tag']
827
828    def __init__(self, real, imag=0.0):
829        self._real = real
830        self._imag = imag
831        self.tag = None
832
833    @property
834    def real(self):
835        return self._real
836
837    @property
838    def imag(self):
839        return self._imag
840
841    def gamma_method(self, **kwargs):
842        """Executes the gamma_method for the real and the imaginary part."""
843        if isinstance(self.real, Obs):
844            self.real.gamma_method(**kwargs)
845        if isinstance(self.imag, Obs):
846            self.imag.gamma_method(**kwargs)
847
848    def is_zero(self):
849        """Checks whether both real and imaginary part are zero within machine precision."""
850        return self.real == 0.0 and self.imag == 0.0
851
852    def conjugate(self):
853        return CObs(self.real, -self.imag)
854
855    def __add__(self, other):
856        if isinstance(other, np.ndarray):
857            return other + self
858        elif hasattr(other, 'real') and hasattr(other, 'imag'):
859            return CObs(self.real + other.real,
860                        self.imag + other.imag)
861        else:
862            return CObs(self.real + other, self.imag)
863
864    def __radd__(self, y):
865        return self + y
866
867    def __sub__(self, other):
868        if isinstance(other, np.ndarray):
869            return -1 * (other - self)
870        elif hasattr(other, 'real') and hasattr(other, 'imag'):
871            return CObs(self.real - other.real, self.imag - other.imag)
872        else:
873            return CObs(self.real - other, self.imag)
874
875    def __rsub__(self, other):
876        return -1 * (self - other)
877
878    def __mul__(self, other):
879        if isinstance(other, np.ndarray):
880            return other * self
881        elif hasattr(other, 'real') and hasattr(other, 'imag'):
882            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
883                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
884                                               [self.real, other.real, self.imag, other.imag],
885                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
886                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
887                                               [self.real, other.real, self.imag, other.imag],
888                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
889            elif getattr(other, 'imag', 0) != 0:
890                return CObs(self.real * other.real - self.imag * other.imag,
891                            self.imag * other.real + self.real * other.imag)
892            else:
893                return CObs(self.real * other.real, self.imag * other.real)
894        else:
895            return CObs(self.real * other, self.imag * other)
896
897    def __rmul__(self, other):
898        return self * other
899
900    def __truediv__(self, other):
901        if isinstance(other, np.ndarray):
902            return 1 / (other / self)
903        elif hasattr(other, 'real') and hasattr(other, 'imag'):
904            r = other.real ** 2 + other.imag ** 2
905            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
906        else:
907            return CObs(self.real / other, self.imag / other)
908
909    def __rtruediv__(self, other):
910        r = self.real ** 2 + self.imag ** 2
911        if hasattr(other, 'real') and hasattr(other, 'imag'):
912            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
913        else:
914            return CObs(self.real * other / r, -self.imag * other / r)
915
916    def __abs__(self):
917        return np.sqrt(self.real**2 + self.imag**2)
918
919    def __pos__(self):
920        return self
921
922    def __neg__(self):
923        return -1 * self
924
925    def __eq__(self, other):
926        return self.real == other.real and self.imag == other.imag
927
928    def __str__(self):
929        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
930
931    def __repr__(self):
932        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

#   CObs(real, imag=0.0)
View Source
828    def __init__(self, real, imag=0.0):
829        self._real = real
830        self._imag = imag
831        self.tag = None
#   tag
#   real
#   imag
#   def gamma_method(self, **kwargs):
View Source
841    def gamma_method(self, **kwargs):
842        """Executes the gamma_method for the real and the imaginary part."""
843        if isinstance(self.real, Obs):
844            self.real.gamma_method(**kwargs)
845        if isinstance(self.imag, Obs):
846            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

#   def is_zero(self):
View Source
848    def is_zero(self):
849        """Checks whether both real and imaginary part are zero within machine precision."""
850        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

#   def conjugate(self):
View Source
852    def conjugate(self):
853        return CObs(self.real, -self.imag)
#   def derived_observable(func, data, array_mode=False, **kwargs):
View Source
1040def derived_observable(func, data, array_mode=False, **kwargs):
1041    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1042
1043    Parameters
1044    ----------
1045    func : object
1046        arbitrary function of the form func(data, **kwargs). For the
1047        automatic differentiation to work, all numpy functions have to have
1048        the autograd wrapper (use 'import autograd.numpy as anp').
1049    data : list
1050        list of Obs, e.g. [obs1, obs2, obs3].
1051    num_grad : bool
1052        if True, numerical derivatives are used instead of autograd
1053        (default False). To control the numerical differentiation the
1054        kwargs of numdifftools.step_generators.MaxStepGenerator
1055        can be used.
1056    man_grad : list
1057        manually supply a list or an array which contains the jacobian
1058        of func. Use cautiously, supplying the wrong derivative will
1059        not be intercepted.
1060
1061    Notes
1062    -----
1063    For simple mathematical operations it can be practical to use anonymous
1064    functions. For the ratio of two observables one can e.g. use
1065
1066    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1067    """
1068
1069    data = np.asarray(data)
1070    raveled_data = data.ravel()
1071
1072    # Workaround for matrix operations containing non Obs data
1073    if not all(isinstance(x, Obs) for x in raveled_data):
1074        for i in range(len(raveled_data)):
1075            if isinstance(raveled_data[i], (int, float)):
1076                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1077
1078    allcov = {}
1079    for o in raveled_data:
1080        for name in o.cov_names:
1081            if name in allcov:
1082                if not np.allclose(allcov[name], o.covobs[name].cov):
1083                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1084            else:
1085                allcov[name] = o.covobs[name].cov
1086
1087    n_obs = len(raveled_data)
1088    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1089    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1090    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1091
1092    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1093    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1094
1095    if data.ndim == 1:
1096        values = np.array([o.value for o in data])
1097    else:
1098        values = np.vectorize(lambda x: x.value)(data)
1099
1100    new_values = func(values, **kwargs)
1101
1102    multi = int(isinstance(new_values, np.ndarray))
1103
1104    new_r_values = {}
1105    new_idl_d = {}
1106    for name in new_sample_names:
1107        idl = []
1108        tmp_values = np.zeros(n_obs)
1109        for i, item in enumerate(raveled_data):
1110            tmp_values[i] = item.r_values.get(name, item.value)
1111            tmp_idl = item.idl.get(name)
1112            if tmp_idl is not None:
1113                idl.append(tmp_idl)
1114        if multi > 0:
1115            tmp_values = np.array(tmp_values).reshape(data.shape)
1116        new_r_values[name] = func(tmp_values, **kwargs)
1117        new_idl_d[name] = _merge_idx(idl)
1118        if not is_merged[name]:
1119            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1120
1121    if 'man_grad' in kwargs:
1122        deriv = np.asarray(kwargs.get('man_grad'))
1123        if new_values.shape + data.shape != deriv.shape:
1124            raise Exception('Manual derivative does not have correct shape.')
1125    elif kwargs.get('num_grad') is True:
1126        if multi > 0:
1127            raise Exception('Multi mode currently not supported for numerical derivative')
1128        options = {
1129            'base_step': 0.1,
1130            'step_ratio': 2.5}
1131        for key in options.keys():
1132            kwarg = kwargs.get(key)
1133            if kwarg is not None:
1134                options[key] = kwarg
1135        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1136        if tmp_df.size == 1:
1137            deriv = np.array([tmp_df.real])
1138        else:
1139            deriv = tmp_df.real
1140    else:
1141        deriv = jacobian(func)(values, **kwargs)
1142
1143    final_result = np.zeros(new_values.shape, dtype=object)
1144
1145    if array_mode is True:
1146
1147        class _Zero_grad():
1148            def __init__(self, N):
1149                self.grad = np.zeros((N, 1))
1150
1151        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1152        d_extracted = {}
1153        g_extracted = {}
1154        for name in new_sample_names:
1155            d_extracted[name] = []
1156            ens_length = len(new_idl_d[name])
1157            for i_dat, dat in enumerate(data):
1158                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1159        for name in new_cov_names:
1160            g_extracted[name] = []
1161            zero_grad = _Zero_grad(new_covobs_lengths[name])
1162            for i_dat, dat in enumerate(data):
1163                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1164
1165    for i_val, new_val in np.ndenumerate(new_values):
1166        new_deltas = {}
1167        new_grad = {}
1168        if array_mode is True:
1169            for name in new_sample_names:
1170                ens_length = d_extracted[name][0].shape[-1]
1171                new_deltas[name] = np.zeros(ens_length)
1172                for i_dat, dat in enumerate(d_extracted[name]):
1173                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1174            for name in new_cov_names:
1175                new_grad[name] = 0
1176                for i_dat, dat in enumerate(g_extracted[name]):
1177                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1178        else:
1179            for j_obs, obs in np.ndenumerate(data):
1180                for name in obs.names:
1181                    if name in obs.cov_names:
1182                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1183                    else:
1184                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1185
1186        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1187
1188        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1189            raise Exception('The same name has been used for deltas and covobs!')
1190        new_samples = []
1191        new_means = []
1192        new_idl = []
1193        new_names_obs = []
1194        for name in new_names:
1195            if name not in new_covobs:
1196                if is_merged[name]:
1197                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1198                else:
1199                    filtered_deltas = new_deltas[name]
1200                    filtered_idl_d = new_idl_d[name]
1201
1202                new_samples.append(filtered_deltas)
1203                new_idl.append(filtered_idl_d)
1204                new_means.append(new_r_values[name][i_val])
1205                new_names_obs.append(name)
1206        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1207        for name in new_covobs:
1208            final_result[i_val].names.append(name)
1209        final_result[i_val]._covobs = new_covobs
1210        final_result[i_val]._value = new_val
1211        final_result[i_val].is_merged = is_merged
1212        final_result[i_val].reweighted = reweighted
1213
1214    if multi == 0:
1215        final_result = final_result.item()
1216
1217    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

#   def reweight(weight, obs, **kwargs):
View Source
1257def reweight(weight, obs, **kwargs):
1258    """Reweight a list of observables.
1259
1260    Parameters
1261    ----------
1262    weight : Obs
1263        Reweighting factor. An Observable that has to be defined on a superset of the
1264        configurations in obs[i].idl for all i.
1265    obs : list
1266        list of Obs, e.g. [obs1, obs2, obs3].
1267    all_configs : bool
1268        if True, the reweighted observables are normalized by the average of
1269        the reweighting factor on all configurations in weight.idl and not
1270        on the configurations in obs[i].idl.
1271    """
1272    result = []
1273    for i in range(len(obs)):
1274        if len(obs[i].cov_names):
1275            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1276        if not set(obs[i].names).issubset(weight.names):
1277            raise Exception('Error: Ensembles do not fit')
1278        for name in obs[i].names:
1279            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1280                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1281        new_samples = []
1282        w_deltas = {}
1283        for name in sorted(obs[i].names):
1284            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1285            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1286        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1287
1288        if kwargs.get('all_configs'):
1289            new_weight = weight
1290        else:
1291            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1292
1293        result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs))
1294        result[-1].reweighted = True
1295        result[-1].is_merged = obs[i].is_merged
1296
1297    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl.
#   def correlate(obs_a, obs_b):
View Source
1300def correlate(obs_a, obs_b):
1301    """Correlate two observables.
1302
1303    Parameters
1304    ----------
1305    obs_a : Obs
1306        First observable
1307    obs_b : Obs
1308        Second observable
1309
1310    Notes
1311    -----
1312    Keep in mind to only correlate primary observables which have not been reweighted
1313    yet. The reweighting has to be applied after correlating the observables.
1314    Currently only works if ensembles are identical (this is not strictly necessary).
1315    """
1316
1317    if sorted(obs_a.names) != sorted(obs_b.names):
1318        raise Exception('Ensembles do not fit')
1319    if len(obs_a.cov_names) or len(obs_b.cov_names):
1320        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1321    for name in obs_a.names:
1322        if obs_a.shape[name] != obs_b.shape[name]:
1323            raise Exception('Shapes of ensemble', name, 'do not fit')
1324        if obs_a.idl[name] != obs_b.idl[name]:
1325            raise Exception('idl of ensemble', name, 'do not fit')
1326
1327    if obs_a.reweighted is True:
1328        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1329    if obs_b.reweighted is True:
1330        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1331
1332    new_samples = []
1333    new_idl = []
1334    for name in sorted(obs_a.names):
1335        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1336        new_idl.append(obs_a.idl[name])
1337
1338    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1339    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1340    o.reweighted = obs_a.reweighted or obs_b.reweighted
1341    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

#   def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
View Source
1344def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1345    r'''Calculates the covariance matrix of a set of observables.
1346
1347    The gamma method has to be applied first to all observables.
1348
1349    Parameters
1350    ----------
1351    obs : list or numpy.ndarray
1352        List or one dimensional array of Obs
1353    visualize : bool
1354        If True plots the corresponding normalized correlation matrix (default False).
1355    correlation : bool
1356        If True the correlation instead of the covariance is returned (default False).
1357    smooth : None or int
1358        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1359        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1360        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1361        small ones.
1362
1363    Notes
1364    -----
1365    The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1366    $$v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1367    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1368    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1369    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1370    '''
1371
1372    length = len(obs)
1373
1374    max_samples = np.max([o.N for o in obs])
1375    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1376        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1377
1378    cov = np.zeros((length, length))
1379    for i in range(length):
1380        for j in range(i, length):
1381            cov[i, j] = _covariance_element(obs[i], obs[j])
1382    cov = cov + cov.T - np.diag(np.diag(cov))
1383
1384    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1385
1386    if isinstance(smooth, int):
1387        corr = _smooth_eigenvalues(corr, smooth)
1388
1389    errors = [o.dvalue for o in obs]
1390    cov = np.diag(errors) @ corr @ np.diag(errors)
1391
1392    eigenvalues = np.linalg.eigh(cov)[0]
1393    if not np.all(eigenvalues >= 0):
1394        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1395
1396    if visualize:
1397        plt.matshow(corr, vmin=-1, vmax=1)
1398        plt.set_cmap('RdBu')
1399        plt.colorbar()
1400        plt.draw()
1401
1402    if correlation is True:
1403        return corr
1404    else:
1405        return cov

Calculates the covariance matrix of a set of observables.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation instead of the covariance is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

#   def import_jackknife(jacks, name, idl=None):
View Source
1484def import_jackknife(jacks, name, idl=None):
1485    """Imports jackknife samples and returns an Obs
1486
1487    Parameters
1488    ----------
1489    jacks : numpy.ndarray
1490        numpy array containing the mean value as zeroth entry and
1491        the N jackknife samples as first to Nth entry.
1492    name : str
1493        name of the ensemble the samples are defined on.
1494    """
1495    length = len(jacks) - 1
1496    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1497    samples = jacks[1:] @ prj
1498    mean = np.mean(samples)
1499    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1500    new_obs._value = jacks[0]
1501    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
#   def merge_obs(list_of_obs):
View Source
1504def merge_obs(list_of_obs):
1505    """Combine all observables in list_of_obs into one new observable
1506
1507    Parameters
1508    ----------
1509    list_of_obs : list
1510        list of the Obs object to be combined
1511
1512    Notes
1513    -----
1514    It is not possible to combine obs which are based on the same replicum
1515    """
1516    replist = [item for obs in list_of_obs for item in obs.names]
1517    if (len(replist) == len(set(replist))) is False:
1518        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1519    if any([len(o.cov_names) for o in list_of_obs]):
1520        raise Exception('Not possible to merge data that contains covobs!')
1521    new_dict = {}
1522    idl_dict = {}
1523    for o in list_of_obs:
1524        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1525                        for key in set(o.deltas) | set(o.r_values)})
1526        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1527
1528    names = sorted(new_dict.keys())
1529    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1530    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1531    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1532    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

#   def cov_Obs(means, cov, name, grad=None):
View Source
1535def cov_Obs(means, cov, name, grad=None):
1536    """Create an Obs based on mean(s) and a covariance matrix
1537
1538    Parameters
1539    ----------
1540    mean : list of floats or float
1541        N mean value(s) of the new Obs
1542    cov : list or array
1543        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1544    name : str
1545        identifier for the covariance matrix
1546    grad : list or array
1547        Gradient of the Covobs wrt. the means belonging to cov.
1548    """
1549
1550    def covobs_to_obs(co):
1551        """Make an Obs out of a Covobs
1552
1553        Parameters
1554        ----------
1555        co : Covobs
1556            Covobs to be embedded into the Obs
1557        """
1558        o = Obs([], [], means=[])
1559        o._value = co.value
1560        o.names.append(co.name)
1561        o._covobs[co.name] = co
1562        o._dvalue = np.sqrt(co.errsq())
1563        return o
1564
1565    ol = []
1566    if isinstance(means, (float, int)):
1567        means = [means]
1568
1569    for i in range(len(means)):
1570        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1571    if ol[0].covobs[name].N != len(means):
1572        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1573    if len(ol) == 1:
1574        return ol[0]
1575    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.