pyerrors.obs

   1import warnings
   2import pickle
   3from math import gcd
   4from functools import reduce
   5import numpy as np
   6import autograd.numpy as anp  # Thinly-wrapped numpy
   7from autograd import jacobian
   8import matplotlib.pyplot as plt
   9from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  10import numdifftools as nd
  11from itertools import groupby
  12from .covobs import Covobs
  13
  14
  15class Obs:
  16    """Class for a general observable.
  17
  18    Instances of Obs are the basic objects of a pyerrors error analysis.
  19    They are initialized with a list which contains arrays of samples for
  20    different ensembles/replica and another list of same length which contains
  21    the names of the ensembles/replica. Mathematical operations can be
  22    performed on instances. The result is another instance of Obs. The error of
  23    an instance can be computed with the gamma_method. Also contains additional
  24    methods for output and visualization of the error calculation.
  25
  26    Attributes
  27    ----------
  28    S_global : float
  29        Standard value for S (default 2.0)
  30    S_dict : dict
  31        Dictionary for S values. If an entry for a given ensemble
  32        exists this overwrites the standard value for that ensemble.
  33    tau_exp_global : float
  34        Standard value for tau_exp (default 0.0)
  35    tau_exp_dict : dict
  36        Dictionary for tau_exp values. If an entry for a given ensemble exists
  37        this overwrites the standard value for that ensemble.
  38    N_sigma_global : float
  39        Standard value for N_sigma (default 1.0)
  40    N_sigma_dict : dict
  41        Dictionary for N_sigma values. If an entry for a given ensemble exists
  42        this overwrites the standard value for that ensemble.
  43    """
  44    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  45                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  46                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  47                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  48                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
  49
  50    S_global = 2.0
  51    S_dict = {}
  52    tau_exp_global = 0.0
  53    tau_exp_dict = {}
  54    N_sigma_global = 1.0
  55    N_sigma_dict = {}
  56    filter_eps = 1e-10
  57
  58    def __init__(self, samples, names, idl=None, **kwargs):
  59        """ Initialize Obs object.
  60
  61        Parameters
  62        ----------
  63        samples : list
  64            list of numpy arrays containing the Monte Carlo samples
  65        names : list
  66            list of strings labeling the individual samples
  67        idl : list, optional
  68            list of ranges or lists on which the samples are defined
  69        """
  70
  71        if kwargs.get("means") is None and len(samples):
  72            if len(samples) != len(names):
  73                raise Exception('Length of samples and names incompatible.')
  74            if idl is not None:
  75                if len(idl) != len(names):
  76                    raise Exception('Length of idl incompatible with samples and names.')
  77            name_length = len(names)
  78            if name_length > 1:
  79                if name_length != len(set(names)):
  80                    raise Exception('names are not unique.')
  81                if not all(isinstance(x, str) for x in names):
  82                    raise TypeError('All names have to be strings.')
  83            else:
  84                if not isinstance(names[0], str):
  85                    raise TypeError('All names have to be strings.')
  86            if min(len(x) for x in samples) <= 4:
  87                raise Exception('Samples have to have at least 5 entries.')
  88
  89        self.names = sorted(names)
  90        self.shape = {}
  91        self.r_values = {}
  92        self.deltas = {}
  93        self._covobs = {}
  94
  95        self._value = 0
  96        self.N = 0
  97        self.is_merged = {}
  98        self.idl = {}
  99        if idl is not None:
 100            for name, idx in sorted(zip(names, idl)):
 101                if isinstance(idx, range):
 102                    self.idl[name] = idx
 103                elif isinstance(idx, (list, np.ndarray)):
 104                    dc = np.unique(np.diff(idx))
 105                    if np.any(dc < 0):
 106                        raise Exception("Unsorted idx for idl[%s]" % (name))
 107                    if len(dc) == 1:
 108                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 109                    else:
 110                        self.idl[name] = list(idx)
 111                else:
 112                    raise Exception('incompatible type for idl[%s].' % (name))
 113        else:
 114            for name, sample in sorted(zip(names, samples)):
 115                self.idl[name] = range(1, len(sample) + 1)
 116
 117        if kwargs.get("means") is not None:
 118            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 119                self.shape[name] = len(self.idl[name])
 120                self.N += self.shape[name]
 121                self.r_values[name] = mean
 122                self.deltas[name] = sample
 123        else:
 124            for name, sample in sorted(zip(names, samples)):
 125                self.shape[name] = len(self.idl[name])
 126                self.N += self.shape[name]
 127                if len(sample) != self.shape[name]:
 128                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 129                self.r_values[name] = np.mean(sample)
 130                self.deltas[name] = sample - self.r_values[name]
 131                self._value += self.shape[name] * self.r_values[name]
 132            self._value /= self.N
 133
 134        self._dvalue = 0.0
 135        self.ddvalue = 0.0
 136        self.reweighted = False
 137
 138        self.tag = None
 139
 140    @property
 141    def value(self):
 142        return self._value
 143
 144    @property
 145    def dvalue(self):
 146        return self._dvalue
 147
 148    @property
 149    def e_names(self):
 150        return sorted(set([o.split('|')[0] for o in self.names]))
 151
 152    @property
 153    def cov_names(self):
 154        return sorted(set([o for o in self.covobs.keys()]))
 155
 156    @property
 157    def mc_names(self):
 158        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 159
 160    @property
 161    def e_content(self):
 162        res = {}
 163        for e, e_name in enumerate(self.e_names):
 164            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 165            if e_name in self.names:
 166                res[e_name].append(e_name)
 167        return res
 168
 169    @property
 170    def covobs(self):
 171        return self._covobs
 172
 173    def gamma_method(self, **kwargs):
 174        """Estimate the error and related properties of the Obs.
 175
 176        Parameters
 177        ----------
 178        S : float
 179            specifies a custom value for the parameter S (default 2.0).
 180            If set to 0 it is assumed that the data exhibits no
 181            autocorrelation. In this case the error estimates coincides
 182            with the sample standard error.
 183        tau_exp : float
 184            positive value triggers the critical slowing down analysis
 185            (default 0.0).
 186        N_sigma : float
 187            number of standard deviations from zero until the tail is
 188            attached to the autocorrelation function (default 1).
 189        fft : bool
 190            determines whether the fft algorithm is used for the computation
 191            of the autocorrelation function (default True)
 192        """
 193
 194        e_content = self.e_content
 195        self.e_dvalue = {}
 196        self.e_ddvalue = {}
 197        self.e_tauint = {}
 198        self.e_dtauint = {}
 199        self.e_windowsize = {}
 200        self.e_n_tauint = {}
 201        self.e_n_dtauint = {}
 202        e_gamma = {}
 203        self.e_rho = {}
 204        self.e_drho = {}
 205        self._dvalue = 0
 206        self.ddvalue = 0
 207
 208        self.S = {}
 209        self.tau_exp = {}
 210        self.N_sigma = {}
 211
 212        if kwargs.get('fft') is False:
 213            fft = False
 214        else:
 215            fft = True
 216
 217        def _parse_kwarg(kwarg_name):
 218            if kwarg_name in kwargs:
 219                tmp = kwargs.get(kwarg_name)
 220                if isinstance(tmp, (int, float)):
 221                    if tmp < 0:
 222                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 223                    for e, e_name in enumerate(self.e_names):
 224                        getattr(self, kwarg_name)[e_name] = tmp
 225                else:
 226                    raise TypeError(kwarg_name + ' is not in proper format.')
 227            else:
 228                for e, e_name in enumerate(self.e_names):
 229                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 230                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 231                    else:
 232                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 233
 234        _parse_kwarg('S')
 235        _parse_kwarg('tau_exp')
 236        _parse_kwarg('N_sigma')
 237
 238        for e, e_name in enumerate(self.mc_names):
 239            r_length = []
 240            for r_name in e_content[e_name]:
 241                if isinstance(self.idl[r_name], range):
 242                    r_length.append(len(self.idl[r_name]))
 243                else:
 244                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
 245
 246            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 247            w_max = max(r_length) // 2
 248            e_gamma[e_name] = np.zeros(w_max)
 249            self.e_rho[e_name] = np.zeros(w_max)
 250            self.e_drho[e_name] = np.zeros(w_max)
 251
 252            for r_name in e_content[e_name]:
 253                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
 254
 255            gamma_div = np.zeros(w_max)
 256            for r_name in e_content[e_name]:
 257                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
 258            gamma_div[gamma_div < 1] = 1.0
 259            e_gamma[e_name] /= gamma_div[:w_max]
 260
 261            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 262                self.e_tauint[e_name] = 0.5
 263                self.e_dtauint[e_name] = 0.0
 264                self.e_dvalue[e_name] = 0.0
 265                self.e_ddvalue[e_name] = 0.0
 266                self.e_windowsize[e_name] = 0
 267                continue
 268
 269            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 270            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 271            # Make sure no entry of tauint is smaller than 0.5
 272            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 273            # hep-lat/0306017 eq. (42)
 274            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 275            self.e_n_dtauint[e_name][0] = 0.0
 276
 277            def _compute_drho(i):
 278                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
 279                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 280
 281            _compute_drho(1)
 282            if self.tau_exp[e_name] > 0:
 283                texp = self.tau_exp[e_name]
 284                # Critical slowing down analysis
 285                if w_max // 2 <= 1:
 286                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 287                for n in range(1, w_max // 2):
 288                    _compute_drho(n + 1)
 289                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 290                        # Bias correction hep-lat/0306017 eq. (49) included
 291                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 292                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 293                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 294                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 295                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 296                        self.e_windowsize[e_name] = n
 297                        break
 298            else:
 299                if self.S[e_name] == 0.0:
 300                    self.e_tauint[e_name] = 0.5
 301                    self.e_dtauint[e_name] = 0.0
 302                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 303                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 304                    self.e_windowsize[e_name] = 0
 305                else:
 306                    # Standard automatic windowing procedure
 307                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 308                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
 309                    for n in range(1, w_max):
 310                        if n < w_max // 2 - 2:
 311                            _compute_drho(n + 1)
 312                        if g_w[n - 1] < 0 or n >= w_max - 1:
 313                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 314                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 315                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 316                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 317                            self.e_windowsize[e_name] = n
 318                            break
 319
 320            self._dvalue += self.e_dvalue[e_name] ** 2
 321            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 322
 323        for e_name in self.cov_names:
 324            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 325            self.e_ddvalue[e_name] = 0
 326            self._dvalue += self.e_dvalue[e_name]**2
 327
 328        self._dvalue = np.sqrt(self._dvalue)
 329        if self._dvalue == 0.0:
 330            self.ddvalue = 0.0
 331        else:
 332            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 333        return
 334
 335    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
 336        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 337           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 338
 339        Parameters
 340        ----------
 341        deltas : list
 342            List of fluctuations
 343        idx : list
 344            List or range of configurations on which the deltas are defined.
 345        shape : int
 346            Number of configurations in idx.
 347        w_max : int
 348            Upper bound for the summation window.
 349        fft : bool
 350            determines whether the fft algorithm is used for the computation
 351            of the autocorrelation function.
 352        """
 353        gamma = np.zeros(w_max)
 354        deltas = _expand_deltas(deltas, idx, shape)
 355        new_shape = len(deltas)
 356        if fft:
 357            max_gamma = min(new_shape, w_max)
 358            # The padding for the fft has to be even
 359            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 360            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 361        else:
 362            for n in range(w_max):
 363                if new_shape - n >= 0:
 364                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 365
 366        return gamma
 367
 368    def details(self, ens_content=True):
 369        """Output detailed properties of the Obs.
 370
 371        Parameters
 372        ----------
 373        ens_content : bool
 374            print details about the ensembles and replica if true.
 375        """
 376        if self.tag is not None:
 377            print("Description:", self.tag)
 378        if not hasattr(self, 'e_dvalue'):
 379            print('Result\t %3.8e' % (self.value))
 380        else:
 381            if self.value == 0.0:
 382                percentage = np.nan
 383            else:
 384                percentage = np.abs(self._dvalue / self.value) * 100
 385            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 386            if len(self.e_names) > 1:
 387                print(' Ensemble errors:')
 388            for e_name in self.mc_names:
 389                if len(self.e_names) > 1:
 390                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 391                if self.tau_exp[e_name] > 0:
 392                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
 393                else:
 394                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
 395            for e_name in self.cov_names:
 396                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 397        if ens_content is True:
 398            if len(self.e_names) == 1:
 399                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 400            else:
 401                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 402            my_string_list = []
 403            for key, value in sorted(self.e_content.items()):
 404                if key not in self.covobs:
 405                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 406                    if len(value) == 1:
 407                        my_string += f': {self.shape[value[0]]} configurations'
 408                        if isinstance(self.idl[value[0]], range):
 409                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 410                        else:
 411                            my_string += ' (irregular range)'
 412                    else:
 413                        sublist = []
 414                        for v in value:
 415                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 416                            my_substring += f': {self.shape[v]} configurations'
 417                            if isinstance(self.idl[v], range):
 418                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 419                            else:
 420                                my_substring += ' (irregular range)'
 421                            sublist.append(my_substring)
 422
 423                        my_string += '\n' + '\n'.join(sublist)
 424                else:
 425                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 426                my_string_list.append(my_string)
 427            print('\n'.join(my_string_list))
 428
 429    def is_zero_within_error(self, sigma=1):
 430        """Checks whether the observable is zero within 'sigma' standard errors.
 431
 432        Parameters
 433        ----------
 434        sigma : int
 435            Number of standard errors used for the check.
 436
 437        Works only properly when the gamma method was run.
 438        """
 439        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 440
 441    def is_zero(self, atol=1e-10):
 442        """Checks whether the observable is zero within a given tolerance.
 443
 444        Parameters
 445        ----------
 446        atol : float
 447            Absolute tolerance (for details see numpy documentation).
 448        """
 449        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 450
 451    def plot_tauint(self, save=None):
 452        """Plot integrated autocorrelation time for each ensemble.
 453
 454        Parameters
 455        ----------
 456        save : str
 457            saves the figure to a file named 'save' if.
 458        """
 459        if not hasattr(self, 'e_dvalue'):
 460            raise Exception('Run the gamma method first.')
 461
 462        for e, e_name in enumerate(self.mc_names):
 463            fig = plt.figure()
 464            plt.xlabel(r'$W$')
 465            plt.ylabel(r'$\tau_\mathrm{int}$')
 466            length = int(len(self.e_n_tauint[e_name]))
 467            if self.tau_exp[e_name] > 0:
 468                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 469                x_help = np.arange(2 * self.tau_exp[e_name])
 470                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 471                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 472                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 473                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 474                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 475                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 476                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 477            else:
 478                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 479                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 480
 481            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 482            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 483            plt.legend()
 484            plt.xlim(-0.5, xmax)
 485            ylim = plt.ylim()
 486            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 487            plt.draw()
 488            if save:
 489                fig.savefig(save + "_" + str(e))
 490
 491    def plot_rho(self, save=None):
 492        """Plot normalized autocorrelation function time for each ensemble.
 493
 494        Parameters
 495        ----------
 496        save : str
 497            saves the figure to a file named 'save' if.
 498        """
 499        if not hasattr(self, 'e_dvalue'):
 500            raise Exception('Run the gamma method first.')
 501        for e, e_name in enumerate(self.mc_names):
 502            fig = plt.figure()
 503            plt.xlabel('W')
 504            plt.ylabel('rho')
 505            length = int(len(self.e_drho[e_name]))
 506            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 507            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 508            if self.tau_exp[e_name] > 0:
 509                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 510                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 511                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 512                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 513            else:
 514                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 515                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 516            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 517            plt.xlim(-0.5, xmax)
 518            plt.draw()
 519            if save:
 520                fig.savefig(save + "_" + str(e))
 521
 522    def plot_rep_dist(self):
 523        """Plot replica distribution for each ensemble with more than one replicum."""
 524        if not hasattr(self, 'e_dvalue'):
 525            raise Exception('Run the gamma method first.')
 526        for e, e_name in enumerate(self.mc_names):
 527            if len(self.e_content[e_name]) == 1:
 528                print('No replica distribution for a single replicum (', e_name, ')')
 529                continue
 530            r_length = []
 531            sub_r_mean = 0
 532            for r, r_name in enumerate(self.e_content[e_name]):
 533                r_length.append(len(self.deltas[r_name]))
 534                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 535            e_N = np.sum(r_length)
 536            sub_r_mean /= e_N
 537            arr = np.zeros(len(self.e_content[e_name]))
 538            for r, r_name in enumerate(self.e_content[e_name]):
 539                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 540            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 541            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 542            plt.draw()
 543
 544    def plot_history(self, expand=True):
 545        """Plot derived Monte Carlo history for each ensemble
 546
 547        Parameters
 548        ----------
 549        expand : bool
 550            show expanded history for irregular Monte Carlo chains (default: True).
 551        """
 552        for e, e_name in enumerate(self.mc_names):
 553            plt.figure()
 554            r_length = []
 555            tmp = []
 556            tmp_expanded = []
 557            for r, r_name in enumerate(self.e_content[e_name]):
 558                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 559                if expand:
 560                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
 561                    r_length.append(len(tmp_expanded[-1]))
 562                else:
 563                    r_length.append(len(tmp[-1]))
 564            e_N = np.sum(r_length)
 565            x = np.arange(e_N)
 566            y_test = np.concatenate(tmp, axis=0)
 567            if expand:
 568                y = np.concatenate(tmp_expanded, axis=0)
 569            else:
 570                y = y_test
 571            plt.errorbar(x, y, fmt='.', markersize=3)
 572            plt.xlim(-0.5, e_N - 0.5)
 573            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 574            plt.draw()
 575
 576    def plot_piechart(self, save=None):
 577        """Plot piechart which shows the fractional contribution of each
 578        ensemble to the error and returns a dictionary containing the fractions.
 579
 580        Parameters
 581        ----------
 582        save : str
 583            saves the figure to a file named 'save' if.
 584        """
 585        if not hasattr(self, 'e_dvalue'):
 586            raise Exception('Run the gamma method first.')
 587        if np.isclose(0.0, self._dvalue, atol=1e-15):
 588            raise Exception('Error is 0.0')
 589        labels = self.e_names
 590        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 591        fig1, ax1 = plt.subplots()
 592        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 593        ax1.axis('equal')
 594        plt.draw()
 595        if save:
 596            fig1.savefig(save)
 597
 598        return dict(zip(self.e_names, sizes))
 599
 600    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 601        """Dump the Obs to a file 'name' of chosen format.
 602
 603        Parameters
 604        ----------
 605        filename : str
 606            name of the file to be saved.
 607        datatype : str
 608            Format of the exported file. Supported formats include
 609            "json.gz" and "pickle"
 610        description : str
 611            Description for output file, only relevant for json.gz format.
 612        path : str
 613            specifies a custom path for the file (default '.')
 614        """
 615        if 'path' in kwargs:
 616            file_name = kwargs.get('path') + '/' + filename
 617        else:
 618            file_name = filename
 619
 620        if datatype == "json.gz":
 621            from .input.json import dump_to_json
 622            dump_to_json([self], file_name, description=description)
 623        elif datatype == "pickle":
 624            with open(file_name + '.p', 'wb') as fb:
 625                pickle.dump(self, fb)
 626        else:
 627            raise Exception("Unknown datatype " + str(datatype))
 628
 629    def export_jackknife(self):
 630        """Export jackknife samples from the Obs
 631
 632        Returns
 633        -------
 634        numpy.ndarray
 635            Returns a numpy array of length N + 1 where N is the number of samples
 636            for the given ensemble and replicum. The zeroth entry of the array contains
 637            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 638            derived from the Obs. The current implementation only works for observables
 639            defined on exactly one ensemble and replicum. The derived jackknife samples
 640            should agree with samples from a full jackknife analysis up to O(1/N).
 641        """
 642
 643        if len(self.names) != 1:
 644            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 645
 646        name = self.names[0]
 647        full_data = self.deltas[name] + self.r_values[name]
 648        n = full_data.size
 649        mean = self.value
 650        tmp_jacks = np.zeros(n + 1)
 651        tmp_jacks[0] = mean
 652        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 653        return tmp_jacks
 654
 655    def __float__(self):
 656        return float(self.value)
 657
 658    def __repr__(self):
 659        return 'Obs[' + str(self) + ']'
 660
 661    def __str__(self):
 662        if self._dvalue == 0.0:
 663            return str(self.value)
 664        fexp = np.floor(np.log10(self._dvalue))
 665        if fexp < 0.0:
 666            return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
 667        elif fexp == 0.0:
 668            return '{:.1f}({:1.1f})'.format(self.value, self._dvalue)
 669        else:
 670            return '{:.0f}({:2.0f})'.format(self.value, self._dvalue)
 671
 672    # Overload comparisons
 673    def __lt__(self, other):
 674        return self.value < other
 675
 676    def __le__(self, other):
 677        return self.value <= other
 678
 679    def __gt__(self, other):
 680        return self.value > other
 681
 682    def __ge__(self, other):
 683        return self.value >= other
 684
 685    def __eq__(self, other):
 686        return (self - other).is_zero()
 687
 688    def __ne__(self, other):
 689        return not (self - other).is_zero()
 690
 691    # Overload math operations
 692    def __add__(self, y):
 693        if isinstance(y, Obs):
 694            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 695        else:
 696            if isinstance(y, np.ndarray):
 697                return np.array([self + o for o in y])
 698            elif y.__class__.__name__ in ['Corr', 'CObs']:
 699                return NotImplemented
 700            else:
 701                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 702
 703    def __radd__(self, y):
 704        return self + y
 705
 706    def __mul__(self, y):
 707        if isinstance(y, Obs):
 708            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 709        else:
 710            if isinstance(y, np.ndarray):
 711                return np.array([self * o for o in y])
 712            elif isinstance(y, complex):
 713                return CObs(self * y.real, self * y.imag)
 714            elif y.__class__.__name__ in ['Corr', 'CObs']:
 715                return NotImplemented
 716            else:
 717                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 718
 719    def __rmul__(self, y):
 720        return self * y
 721
 722    def __sub__(self, y):
 723        if isinstance(y, Obs):
 724            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 725        else:
 726            if isinstance(y, np.ndarray):
 727                return np.array([self - o for o in y])
 728            elif y.__class__.__name__ in ['Corr', 'CObs']:
 729                return NotImplemented
 730            else:
 731                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 732
 733    def __rsub__(self, y):
 734        return -1 * (self - y)
 735
 736    def __pos__(self):
 737        return self
 738
 739    def __neg__(self):
 740        return -1 * self
 741
 742    def __truediv__(self, y):
 743        if isinstance(y, Obs):
 744            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 745        else:
 746            if isinstance(y, np.ndarray):
 747                return np.array([self / o for o in y])
 748            elif y.__class__.__name__ in ['Corr', 'CObs']:
 749                return NotImplemented
 750            else:
 751                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 752
 753    def __rtruediv__(self, y):
 754        if isinstance(y, Obs):
 755            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 756        else:
 757            if isinstance(y, np.ndarray):
 758                return np.array([o / self for o in y])
 759            elif y.__class__.__name__ in ['Corr', 'CObs']:
 760                return NotImplemented
 761            else:
 762                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 763
 764    def __pow__(self, y):
 765        if isinstance(y, Obs):
 766            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 767        else:
 768            return derived_observable(lambda x: x[0] ** y, [self])
 769
 770    def __rpow__(self, y):
 771        if isinstance(y, Obs):
 772            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 773        else:
 774            return derived_observable(lambda x: y ** x[0], [self])
 775
 776    def __abs__(self):
 777        return derived_observable(lambda x: anp.abs(x[0]), [self])
 778
 779    # Overload numpy functions
 780    def sqrt(self):
 781        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 782
 783    def log(self):
 784        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 785
 786    def exp(self):
 787        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 788
 789    def sin(self):
 790        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 791
 792    def cos(self):
 793        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 794
 795    def tan(self):
 796        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 797
 798    def arcsin(self):
 799        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 800
 801    def arccos(self):
 802        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 803
 804    def arctan(self):
 805        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 806
 807    def sinh(self):
 808        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 809
 810    def cosh(self):
 811        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 812
 813    def tanh(self):
 814        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 815
 816    def arcsinh(self):
 817        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 818
 819    def arccosh(self):
 820        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 821
 822    def arctanh(self):
 823        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 824
 825
 826class CObs:
 827    """Class for a complex valued observable."""
 828    __slots__ = ['_real', '_imag', 'tag']
 829
 830    def __init__(self, real, imag=0.0):
 831        self._real = real
 832        self._imag = imag
 833        self.tag = None
 834
 835    @property
 836    def real(self):
 837        return self._real
 838
 839    @property
 840    def imag(self):
 841        return self._imag
 842
 843    def gamma_method(self, **kwargs):
 844        """Executes the gamma_method for the real and the imaginary part."""
 845        if isinstance(self.real, Obs):
 846            self.real.gamma_method(**kwargs)
 847        if isinstance(self.imag, Obs):
 848            self.imag.gamma_method(**kwargs)
 849
 850    def is_zero(self):
 851        """Checks whether both real and imaginary part are zero within machine precision."""
 852        return self.real == 0.0 and self.imag == 0.0
 853
 854    def conjugate(self):
 855        return CObs(self.real, -self.imag)
 856
 857    def __add__(self, other):
 858        if isinstance(other, np.ndarray):
 859            return other + self
 860        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 861            return CObs(self.real + other.real,
 862                        self.imag + other.imag)
 863        else:
 864            return CObs(self.real + other, self.imag)
 865
 866    def __radd__(self, y):
 867        return self + y
 868
 869    def __sub__(self, other):
 870        if isinstance(other, np.ndarray):
 871            return -1 * (other - self)
 872        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 873            return CObs(self.real - other.real, self.imag - other.imag)
 874        else:
 875            return CObs(self.real - other, self.imag)
 876
 877    def __rsub__(self, other):
 878        return -1 * (self - other)
 879
 880    def __mul__(self, other):
 881        if isinstance(other, np.ndarray):
 882            return other * self
 883        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 884            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 885                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 886                                               [self.real, other.real, self.imag, other.imag],
 887                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 888                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 889                                               [self.real, other.real, self.imag, other.imag],
 890                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 891            elif getattr(other, 'imag', 0) != 0:
 892                return CObs(self.real * other.real - self.imag * other.imag,
 893                            self.imag * other.real + self.real * other.imag)
 894            else:
 895                return CObs(self.real * other.real, self.imag * other.real)
 896        else:
 897            return CObs(self.real * other, self.imag * other)
 898
 899    def __rmul__(self, other):
 900        return self * other
 901
 902    def __truediv__(self, other):
 903        if isinstance(other, np.ndarray):
 904            return 1 / (other / self)
 905        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 906            r = other.real ** 2 + other.imag ** 2
 907            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 908        else:
 909            return CObs(self.real / other, self.imag / other)
 910
 911    def __rtruediv__(self, other):
 912        r = self.real ** 2 + self.imag ** 2
 913        if hasattr(other, 'real') and hasattr(other, 'imag'):
 914            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 915        else:
 916            return CObs(self.real * other / r, -self.imag * other / r)
 917
 918    def __abs__(self):
 919        return np.sqrt(self.real**2 + self.imag**2)
 920
 921    def __pos__(self):
 922        return self
 923
 924    def __neg__(self):
 925        return -1 * self
 926
 927    def __eq__(self, other):
 928        return self.real == other.real and self.imag == other.imag
 929
 930    def __str__(self):
 931        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 932
 933    def __repr__(self):
 934        return 'CObs[' + str(self) + ']'
 935
 936
 937def _expand_deltas(deltas, idx, shape):
 938    """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0.
 939       If idx is of type range, the deltas are not changed
 940
 941    Parameters
 942    ----------
 943    deltas : list
 944        List of fluctuations
 945    idx : list
 946        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
 947    shape : int
 948        Number of configs in idx.
 949    """
 950    if isinstance(idx, range):
 951        return deltas
 952    else:
 953        ret = np.zeros(idx[-1] - idx[0] + 1)
 954        for i in range(shape):
 955            ret[idx[i] - idx[0]] = deltas[i]
 956        return ret
 957
 958
 959def _merge_idx(idl):
 960    """Returns the union of all lists in idl as sorted list
 961
 962    Parameters
 963    ----------
 964    idl : list
 965        List of lists or ranges.
 966    """
 967
 968    # Use groupby to efficiently check whether all elements of idl are identical
 969    try:
 970        g = groupby(idl)
 971        if next(g, True) and not next(g, False):
 972            return idl[0]
 973    except Exception:
 974        pass
 975
 976    if np.all([type(idx) is range for idx in idl]):
 977        if len(set([idx[0] for idx in idl])) == 1:
 978            idstart = min([idx.start for idx in idl])
 979            idstop = max([idx.stop for idx in idl])
 980            idstep = min([idx.step for idx in idl])
 981            return range(idstart, idstop, idstep)
 982
 983    return sorted(set().union(*idl))
 984
 985
 986def _intersection_idx(idl):
 987    """Returns the intersection of all lists in idl as sorted list
 988
 989    Parameters
 990    ----------
 991    idl : list
 992        List of lists or ranges.
 993    """
 994
 995    def _lcm(*args):
 996        """Returns the lowest common multiple of args.
 997
 998        From python 3.9 onwards the math library contains an lcm function."""
 999        return reduce(lambda a, b: a * b // gcd(a, b), args)
1000
1001    # Use groupby to efficiently check whether all elements of idl are identical
1002    try:
1003        g = groupby(idl)
1004        if next(g, True) and not next(g, False):
1005            return idl[0]
1006    except Exception:
1007        pass
1008
1009    if np.all([type(idx) is range for idx in idl]):
1010        if len(set([idx[0] for idx in idl])) == 1:
1011            idstart = max([idx.start for idx in idl])
1012            idstop = min([idx.stop for idx in idl])
1013            idstep = _lcm(*[idx.step for idx in idl])
1014            return range(idstart, idstop, idstep)
1015
1016    return sorted(set.intersection(*[set(o) for o in idl]))
1017
1018
1019def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1020    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1021       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1022       common divisor of the step sizes is used as new step size.
1023
1024    Parameters
1025    ----------
1026    deltas : list
1027        List of fluctuations
1028    idx : list
1029        List or range of configs on which the deltas are defined.
1030        Has to be a subset of new_idx and has to be sorted in ascending order.
1031    shape : list
1032        Number of configs in idx.
1033    new_idx : list
1034        List of configs that defines the new range, has to be sorted in ascending order.
1035    """
1036
1037    if type(idx) is range and type(new_idx) is range:
1038        if idx == new_idx:
1039            return deltas
1040    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1041    for i in range(shape):
1042        ret[idx[i] - new_idx[0]] = deltas[i]
1043    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
1044
1045
1046def _collapse_deltas_for_merge(deltas, idx, shape, new_idx):
1047    """Collapse deltas defined on idx to the list of configs that is defined by new_idx.
1048       If idx and new_idx are of type range, the smallest
1049       common divisor of the step sizes is used as new step size.
1050
1051    Parameters
1052    ----------
1053    deltas : list
1054        List of fluctuations
1055    idx : list
1056        List or range of configs on which the deltas are defined.
1057        Has to be a subset of new_idx and has to be sorted in ascending order.
1058    shape : list
1059        Number of configs in idx.
1060    new_idx : list
1061        List of configs that defines the new range, has to be sorted in ascending order.
1062    """
1063
1064    if type(idx) is range and type(new_idx) is range:
1065        if idx == new_idx:
1066            return deltas
1067    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1068    for i in range(shape):
1069        if idx[i] in new_idx:
1070            ret[idx[i] - new_idx[0]] = deltas[i]
1071    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
1072
1073
1074def _filter_zeroes(deltas, idx, eps=Obs.filter_eps):
1075    """Filter out all configurations with vanishing fluctuation such that they do not
1076       contribute to the error estimate anymore. Returns the new deltas and
1077       idx according to the filtering.
1078       A fluctuation is considered to be vanishing, if it is smaller than eps times
1079       the mean of the absolute values of all deltas in one list.
1080
1081    Parameters
1082    ----------
1083    deltas : list
1084        List of fluctuations
1085    idx : list
1086        List or ranges of configs on which the deltas are defined.
1087    eps : float
1088        Prefactor that enters the filter criterion.
1089    """
1090    new_deltas = []
1091    new_idx = []
1092    maxd = np.mean(np.fabs(deltas))
1093    for i in range(len(deltas)):
1094        if abs(deltas[i]) > eps * maxd:
1095            new_deltas.append(deltas[i])
1096            new_idx.append(idx[i])
1097    if new_idx:
1098        return np.array(new_deltas), new_idx
1099    else:
1100        return deltas, idx
1101
1102
1103def derived_observable(func, data, array_mode=False, **kwargs):
1104    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1105
1106    Parameters
1107    ----------
1108    func : object
1109        arbitrary function of the form func(data, **kwargs). For the
1110        automatic differentiation to work, all numpy functions have to have
1111        the autograd wrapper (use 'import autograd.numpy as anp').
1112    data : list
1113        list of Obs, e.g. [obs1, obs2, obs3].
1114    num_grad : bool
1115        if True, numerical derivatives are used instead of autograd
1116        (default False). To control the numerical differentiation the
1117        kwargs of numdifftools.step_generators.MaxStepGenerator
1118        can be used.
1119    man_grad : list
1120        manually supply a list or an array which contains the jacobian
1121        of func. Use cautiously, supplying the wrong derivative will
1122        not be intercepted.
1123
1124    Notes
1125    -----
1126    For simple mathematical operations it can be practical to use anonymous
1127    functions. For the ratio of two observables one can e.g. use
1128
1129    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1130    """
1131
1132    data = np.asarray(data)
1133    raveled_data = data.ravel()
1134
1135    # Workaround for matrix operations containing non Obs data
1136    if not all(isinstance(x, Obs) for x in raveled_data):
1137        for i in range(len(raveled_data)):
1138            if isinstance(raveled_data[i], (int, float)):
1139                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1140
1141    allcov = {}
1142    for o in raveled_data:
1143        for name in o.cov_names:
1144            if name in allcov:
1145                if not np.allclose(allcov[name], o.covobs[name].cov):
1146                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1147            else:
1148                allcov[name] = o.covobs[name].cov
1149
1150    n_obs = len(raveled_data)
1151    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1152    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1153    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1154
1155    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1156    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1157
1158    if data.ndim == 1:
1159        values = np.array([o.value for o in data])
1160    else:
1161        values = np.vectorize(lambda x: x.value)(data)
1162
1163    new_values = func(values, **kwargs)
1164
1165    multi = int(isinstance(new_values, np.ndarray))
1166
1167    new_r_values = {}
1168    new_idl_d = {}
1169    for name in new_sample_names:
1170        idl = []
1171        tmp_values = np.zeros(n_obs)
1172        for i, item in enumerate(raveled_data):
1173            tmp_values[i] = item.r_values.get(name, item.value)
1174            tmp_idl = item.idl.get(name)
1175            if tmp_idl is not None:
1176                idl.append(tmp_idl)
1177        if multi > 0:
1178            tmp_values = np.array(tmp_values).reshape(data.shape)
1179        new_r_values[name] = func(tmp_values, **kwargs)
1180        new_idl_d[name] = _merge_idx(idl)
1181        if not is_merged[name]:
1182            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1183
1184    if 'man_grad' in kwargs:
1185        deriv = np.asarray(kwargs.get('man_grad'))
1186        if new_values.shape + data.shape != deriv.shape:
1187            raise Exception('Manual derivative does not have correct shape.')
1188    elif kwargs.get('num_grad') is True:
1189        if multi > 0:
1190            raise Exception('Multi mode currently not supported for numerical derivative')
1191        options = {
1192            'base_step': 0.1,
1193            'step_ratio': 2.5}
1194        for key in options.keys():
1195            kwarg = kwargs.get(key)
1196            if kwarg is not None:
1197                options[key] = kwarg
1198        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1199        if tmp_df.size == 1:
1200            deriv = np.array([tmp_df.real])
1201        else:
1202            deriv = tmp_df.real
1203    else:
1204        deriv = jacobian(func)(values, **kwargs)
1205
1206    final_result = np.zeros(new_values.shape, dtype=object)
1207
1208    if array_mode is True:
1209
1210        class _Zero_grad():
1211            def __init__(self, N):
1212                self.grad = np.zeros((N, 1))
1213
1214        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1215        d_extracted = {}
1216        g_extracted = {}
1217        for name in new_sample_names:
1218            d_extracted[name] = []
1219            ens_length = len(new_idl_d[name])
1220            for i_dat, dat in enumerate(data):
1221                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1222        for name in new_cov_names:
1223            g_extracted[name] = []
1224            zero_grad = _Zero_grad(new_covobs_lengths[name])
1225            for i_dat, dat in enumerate(data):
1226                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1227
1228    for i_val, new_val in np.ndenumerate(new_values):
1229        new_deltas = {}
1230        new_grad = {}
1231        if array_mode is True:
1232            for name in new_sample_names:
1233                ens_length = d_extracted[name][0].shape[-1]
1234                new_deltas[name] = np.zeros(ens_length)
1235                for i_dat, dat in enumerate(d_extracted[name]):
1236                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1237            for name in new_cov_names:
1238                new_grad[name] = 0
1239                for i_dat, dat in enumerate(g_extracted[name]):
1240                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1241        else:
1242            for j_obs, obs in np.ndenumerate(data):
1243                for name in obs.names:
1244                    if name in obs.cov_names:
1245                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1246                    else:
1247                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1248
1249        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1250
1251        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1252            raise Exception('The same name has been used for deltas and covobs!')
1253        new_samples = []
1254        new_means = []
1255        new_idl = []
1256        new_names_obs = []
1257        for name in new_names:
1258            if name not in new_covobs:
1259                if is_merged[name]:
1260                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1261                else:
1262                    filtered_deltas = new_deltas[name]
1263                    filtered_idl_d = new_idl_d[name]
1264
1265                new_samples.append(filtered_deltas)
1266                new_idl.append(filtered_idl_d)
1267                new_means.append(new_r_values[name][i_val])
1268                new_names_obs.append(name)
1269        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1270        for name in new_covobs:
1271            final_result[i_val].names.append(name)
1272        final_result[i_val]._covobs = new_covobs
1273        final_result[i_val]._value = new_val
1274        final_result[i_val].is_merged = is_merged
1275        final_result[i_val].reweighted = reweighted
1276
1277    if multi == 0:
1278        final_result = final_result.item()
1279
1280    return final_result
1281
1282
1283def _reduce_deltas(deltas, idx_old, idx_new):
1284    """Extract deltas defined on idx_old on all configs of idx_new.
1285
1286    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1287    are ordered in an ascending order.
1288
1289    Parameters
1290    ----------
1291    deltas : list
1292        List of fluctuations
1293    idx_old : list
1294        List or range of configs on which the deltas are defined
1295    idx_new : list
1296        List of configs for which we want to extract the deltas.
1297        Has to be a subset of idx_old.
1298    """
1299    if not len(deltas) == len(idx_old):
1300        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1301    if type(idx_old) is range and type(idx_new) is range:
1302        if idx_old == idx_new:
1303            return deltas
1304    shape = len(idx_new)
1305    ret = np.zeros(shape)
1306    oldpos = 0
1307    for i in range(shape):
1308        pos = -1
1309        for j in range(oldpos, len(idx_old)):
1310            if idx_old[j] == idx_new[i]:
1311                pos = j
1312                break
1313        if pos < 0:
1314            raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i]))
1315        ret[i] = deltas[pos]
1316        oldpos = pos
1317    return np.array(ret)
1318
1319
1320def reweight(weight, obs, **kwargs):
1321    """Reweight a list of observables.
1322
1323    Parameters
1324    ----------
1325    weight : Obs
1326        Reweighting factor. An Observable that has to be defined on a superset of the
1327        configurations in obs[i].idl for all i.
1328    obs : list
1329        list of Obs, e.g. [obs1, obs2, obs3].
1330    all_configs : bool
1331        if True, the reweighted observables are normalized by the average of
1332        the reweighting factor on all configurations in weight.idl and not
1333        on the configurations in obs[i].idl.
1334    """
1335    result = []
1336    for i in range(len(obs)):
1337        if len(obs[i].cov_names):
1338            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1339        if not set(obs[i].names).issubset(weight.names):
1340            raise Exception('Error: Ensembles do not fit')
1341        for name in obs[i].names:
1342            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1343                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1344        new_samples = []
1345        w_deltas = {}
1346        for name in sorted(obs[i].names):
1347            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1348            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1349        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1350
1351        if kwargs.get('all_configs'):
1352            new_weight = weight
1353        else:
1354            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1355
1356        result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs))
1357        result[-1].reweighted = True
1358        result[-1].is_merged = obs[i].is_merged
1359
1360    return result
1361
1362
1363def correlate(obs_a, obs_b):
1364    """Correlate two observables.
1365
1366    Parameters
1367    ----------
1368    obs_a : Obs
1369        First observable
1370    obs_b : Obs
1371        Second observable
1372
1373    Notes
1374    -----
1375    Keep in mind to only correlate primary observables which have not been reweighted
1376    yet. The reweighting has to be applied after correlating the observables.
1377    Currently only works if ensembles are identical (this is not strictly necessary).
1378    """
1379
1380    if sorted(obs_a.names) != sorted(obs_b.names):
1381        raise Exception('Ensembles do not fit')
1382    if len(obs_a.cov_names) or len(obs_b.cov_names):
1383        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1384    for name in obs_a.names:
1385        if obs_a.shape[name] != obs_b.shape[name]:
1386            raise Exception('Shapes of ensemble', name, 'do not fit')
1387        if obs_a.idl[name] != obs_b.idl[name]:
1388            raise Exception('idl of ensemble', name, 'do not fit')
1389
1390    if obs_a.reweighted is True:
1391        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1392    if obs_b.reweighted is True:
1393        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1394
1395    new_samples = []
1396    new_idl = []
1397    for name in sorted(obs_a.names):
1398        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1399        new_idl.append(obs_a.idl[name])
1400
1401    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1402    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1403    o.reweighted = obs_a.reweighted or obs_b.reweighted
1404    return o
1405
1406
1407def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1408    r'''Calculates the error covariance matrix of a set of observables.
1409
1410    The gamma method has to be applied first to all observables.
1411
1412    Parameters
1413    ----------
1414    obs : list or numpy.ndarray
1415        List or one dimensional array of Obs
1416    visualize : bool
1417        If True plots the corresponding normalized correlation matrix (default False).
1418    correlation : bool
1419        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1420    smooth : None or int
1421        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1422        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1423        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1424        small ones.
1425
1426    Notes
1427    -----
1428    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1429    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1430    in the absence of autocorrelation.
1431    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1432    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1433    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1434    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1435    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1436    '''
1437
1438    length = len(obs)
1439
1440    max_samples = np.max([o.N for o in obs])
1441    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1442        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1443
1444    cov = np.zeros((length, length))
1445    for i in range(length):
1446        for j in range(i, length):
1447            cov[i, j] = _covariance_element(obs[i], obs[j])
1448    cov = cov + cov.T - np.diag(np.diag(cov))
1449
1450    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1451
1452    if isinstance(smooth, int):
1453        corr = _smooth_eigenvalues(corr, smooth)
1454
1455    if visualize:
1456        plt.matshow(corr, vmin=-1, vmax=1)
1457        plt.set_cmap('RdBu')
1458        plt.colorbar()
1459        plt.draw()
1460
1461    if correlation is True:
1462        return corr
1463
1464    errors = [o.dvalue for o in obs]
1465    cov = np.diag(errors) @ corr @ np.diag(errors)
1466
1467    eigenvalues = np.linalg.eigh(cov)[0]
1468    if not np.all(eigenvalues >= 0):
1469        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1470
1471    return cov
1472
1473
1474def _smooth_eigenvalues(corr, E):
1475    """Eigenvalue smoothing as described in hep-lat/9412087
1476
1477    corr : np.ndarray
1478        correlation matrix
1479    E : integer
1480        Number of eigenvalues to be left substantially unchanged
1481    """
1482    if not (2 < E < corr.shape[0] - 1):
1483        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1484    vals, vec = np.linalg.eigh(corr)
1485    lambda_min = np.mean(vals[:-E])
1486    vals[vals < lambda_min] = lambda_min
1487    vals /= np.mean(vals)
1488    return vec @ np.diag(vals) @ vec.T
1489
1490
1491def _covariance_element(obs1, obs2):
1492    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1493
1494    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1495        deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx)
1496        deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx)
1497        return np.sum(deltas1 * deltas2)
1498
1499    if set(obs1.names).isdisjoint(set(obs2.names)):
1500        return 0.0
1501
1502    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1503        raise Exception('The gamma method has to be applied to both Obs first.')
1504
1505    dvalue = 0.0
1506
1507    for e_name in obs1.mc_names:
1508
1509        if e_name not in obs2.mc_names:
1510            continue
1511
1512        idl_d = {}
1513        for r_name in obs1.e_content[e_name]:
1514            if r_name not in obs2.e_content[e_name]:
1515                continue
1516            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1517
1518        gamma = 0.0
1519
1520        for r_name in obs1.e_content[e_name]:
1521            if r_name not in obs2.e_content[e_name]:
1522                continue
1523            if len(idl_d[r_name]) == 0:
1524                continue
1525            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1526
1527        if gamma == 0.0:
1528            continue
1529
1530        gamma_div = 0.0
1531        for r_name in obs1.e_content[e_name]:
1532            if r_name not in obs2.e_content[e_name]:
1533                continue
1534            if len(idl_d[r_name]) == 0:
1535                continue
1536            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1537        gamma /= gamma_div
1538
1539        dvalue += gamma
1540
1541    for e_name in obs1.cov_names:
1542
1543        if e_name not in obs2.cov_names:
1544            continue
1545
1546        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1547
1548    return dvalue
1549
1550
1551def import_jackknife(jacks, name, idl=None):
1552    """Imports jackknife samples and returns an Obs
1553
1554    Parameters
1555    ----------
1556    jacks : numpy.ndarray
1557        numpy array containing the mean value as zeroth entry and
1558        the N jackknife samples as first to Nth entry.
1559    name : str
1560        name of the ensemble the samples are defined on.
1561    """
1562    length = len(jacks) - 1
1563    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1564    samples = jacks[1:] @ prj
1565    mean = np.mean(samples)
1566    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1567    new_obs._value = jacks[0]
1568    return new_obs
1569
1570
1571def merge_obs(list_of_obs):
1572    """Combine all observables in list_of_obs into one new observable
1573
1574    Parameters
1575    ----------
1576    list_of_obs : list
1577        list of the Obs object to be combined
1578
1579    Notes
1580    -----
1581    It is not possible to combine obs which are based on the same replicum
1582    """
1583    replist = [item for obs in list_of_obs for item in obs.names]
1584    if (len(replist) == len(set(replist))) is False:
1585        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1586    if any([len(o.cov_names) for o in list_of_obs]):
1587        raise Exception('Not possible to merge data that contains covobs!')
1588    new_dict = {}
1589    idl_dict = {}
1590    for o in list_of_obs:
1591        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1592                        for key in set(o.deltas) | set(o.r_values)})
1593        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1594
1595    names = sorted(new_dict.keys())
1596    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1597    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1598    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1599    return o
1600
1601
1602def cov_Obs(means, cov, name, grad=None):
1603    """Create an Obs based on mean(s) and a covariance matrix
1604
1605    Parameters
1606    ----------
1607    mean : list of floats or float
1608        N mean value(s) of the new Obs
1609    cov : list or array
1610        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1611    name : str
1612        identifier for the covariance matrix
1613    grad : list or array
1614        Gradient of the Covobs wrt. the means belonging to cov.
1615    """
1616
1617    def covobs_to_obs(co):
1618        """Make an Obs out of a Covobs
1619
1620        Parameters
1621        ----------
1622        co : Covobs
1623            Covobs to be embedded into the Obs
1624        """
1625        o = Obs([], [], means=[])
1626        o._value = co.value
1627        o.names.append(co.name)
1628        o._covobs[co.name] = co
1629        o._dvalue = np.sqrt(co.errsq())
1630        return o
1631
1632    ol = []
1633    if isinstance(means, (float, int)):
1634        means = [means]
1635
1636    for i in range(len(means)):
1637        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1638    if ol[0].covobs[name].N != len(means):
1639        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1640    if len(ol) == 1:
1641        return ol[0]
1642    return ol
class Obs:
 16class Obs:
 17    """Class for a general observable.
 18
 19    Instances of Obs are the basic objects of a pyerrors error analysis.
 20    They are initialized with a list which contains arrays of samples for
 21    different ensembles/replica and another list of same length which contains
 22    the names of the ensembles/replica. Mathematical operations can be
 23    performed on instances. The result is another instance of Obs. The error of
 24    an instance can be computed with the gamma_method. Also contains additional
 25    methods for output and visualization of the error calculation.
 26
 27    Attributes
 28    ----------
 29    S_global : float
 30        Standard value for S (default 2.0)
 31    S_dict : dict
 32        Dictionary for S values. If an entry for a given ensemble
 33        exists this overwrites the standard value for that ensemble.
 34    tau_exp_global : float
 35        Standard value for tau_exp (default 0.0)
 36    tau_exp_dict : dict
 37        Dictionary for tau_exp values. If an entry for a given ensemble exists
 38        this overwrites the standard value for that ensemble.
 39    N_sigma_global : float
 40        Standard value for N_sigma (default 1.0)
 41    N_sigma_dict : dict
 42        Dictionary for N_sigma values. If an entry for a given ensemble exists
 43        this overwrites the standard value for that ensemble.
 44    """
 45    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 46                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 47                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 48                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 49                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
 50
 51    S_global = 2.0
 52    S_dict = {}
 53    tau_exp_global = 0.0
 54    tau_exp_dict = {}
 55    N_sigma_global = 1.0
 56    N_sigma_dict = {}
 57    filter_eps = 1e-10
 58
 59    def __init__(self, samples, names, idl=None, **kwargs):
 60        """ Initialize Obs object.
 61
 62        Parameters
 63        ----------
 64        samples : list
 65            list of numpy arrays containing the Monte Carlo samples
 66        names : list
 67            list of strings labeling the individual samples
 68        idl : list, optional
 69            list of ranges or lists on which the samples are defined
 70        """
 71
 72        if kwargs.get("means") is None and len(samples):
 73            if len(samples) != len(names):
 74                raise Exception('Length of samples and names incompatible.')
 75            if idl is not None:
 76                if len(idl) != len(names):
 77                    raise Exception('Length of idl incompatible with samples and names.')
 78            name_length = len(names)
 79            if name_length > 1:
 80                if name_length != len(set(names)):
 81                    raise Exception('names are not unique.')
 82                if not all(isinstance(x, str) for x in names):
 83                    raise TypeError('All names have to be strings.')
 84            else:
 85                if not isinstance(names[0], str):
 86                    raise TypeError('All names have to be strings.')
 87            if min(len(x) for x in samples) <= 4:
 88                raise Exception('Samples have to have at least 5 entries.')
 89
 90        self.names = sorted(names)
 91        self.shape = {}
 92        self.r_values = {}
 93        self.deltas = {}
 94        self._covobs = {}
 95
 96        self._value = 0
 97        self.N = 0
 98        self.is_merged = {}
 99        self.idl = {}
100        if idl is not None:
101            for name, idx in sorted(zip(names, idl)):
102                if isinstance(idx, range):
103                    self.idl[name] = idx
104                elif isinstance(idx, (list, np.ndarray)):
105                    dc = np.unique(np.diff(idx))
106                    if np.any(dc < 0):
107                        raise Exception("Unsorted idx for idl[%s]" % (name))
108                    if len(dc) == 1:
109                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
110                    else:
111                        self.idl[name] = list(idx)
112                else:
113                    raise Exception('incompatible type for idl[%s].' % (name))
114        else:
115            for name, sample in sorted(zip(names, samples)):
116                self.idl[name] = range(1, len(sample) + 1)
117
118        if kwargs.get("means") is not None:
119            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
120                self.shape[name] = len(self.idl[name])
121                self.N += self.shape[name]
122                self.r_values[name] = mean
123                self.deltas[name] = sample
124        else:
125            for name, sample in sorted(zip(names, samples)):
126                self.shape[name] = len(self.idl[name])
127                self.N += self.shape[name]
128                if len(sample) != self.shape[name]:
129                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
130                self.r_values[name] = np.mean(sample)
131                self.deltas[name] = sample - self.r_values[name]
132                self._value += self.shape[name] * self.r_values[name]
133            self._value /= self.N
134
135        self._dvalue = 0.0
136        self.ddvalue = 0.0
137        self.reweighted = False
138
139        self.tag = None
140
141    @property
142    def value(self):
143        return self._value
144
145    @property
146    def dvalue(self):
147        return self._dvalue
148
149    @property
150    def e_names(self):
151        return sorted(set([o.split('|')[0] for o in self.names]))
152
153    @property
154    def cov_names(self):
155        return sorted(set([o for o in self.covobs.keys()]))
156
157    @property
158    def mc_names(self):
159        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
160
161    @property
162    def e_content(self):
163        res = {}
164        for e, e_name in enumerate(self.e_names):
165            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
166            if e_name in self.names:
167                res[e_name].append(e_name)
168        return res
169
170    @property
171    def covobs(self):
172        return self._covobs
173
174    def gamma_method(self, **kwargs):
175        """Estimate the error and related properties of the Obs.
176
177        Parameters
178        ----------
179        S : float
180            specifies a custom value for the parameter S (default 2.0).
181            If set to 0 it is assumed that the data exhibits no
182            autocorrelation. In this case the error estimates coincides
183            with the sample standard error.
184        tau_exp : float
185            positive value triggers the critical slowing down analysis
186            (default 0.0).
187        N_sigma : float
188            number of standard deviations from zero until the tail is
189            attached to the autocorrelation function (default 1).
190        fft : bool
191            determines whether the fft algorithm is used for the computation
192            of the autocorrelation function (default True)
193        """
194
195        e_content = self.e_content
196        self.e_dvalue = {}
197        self.e_ddvalue = {}
198        self.e_tauint = {}
199        self.e_dtauint = {}
200        self.e_windowsize = {}
201        self.e_n_tauint = {}
202        self.e_n_dtauint = {}
203        e_gamma = {}
204        self.e_rho = {}
205        self.e_drho = {}
206        self._dvalue = 0
207        self.ddvalue = 0
208
209        self.S = {}
210        self.tau_exp = {}
211        self.N_sigma = {}
212
213        if kwargs.get('fft') is False:
214            fft = False
215        else:
216            fft = True
217
218        def _parse_kwarg(kwarg_name):
219            if kwarg_name in kwargs:
220                tmp = kwargs.get(kwarg_name)
221                if isinstance(tmp, (int, float)):
222                    if tmp < 0:
223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
224                    for e, e_name in enumerate(self.e_names):
225                        getattr(self, kwarg_name)[e_name] = tmp
226                else:
227                    raise TypeError(kwarg_name + ' is not in proper format.')
228            else:
229                for e, e_name in enumerate(self.e_names):
230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
232                    else:
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
234
235        _parse_kwarg('S')
236        _parse_kwarg('tau_exp')
237        _parse_kwarg('N_sigma')
238
239        for e, e_name in enumerate(self.mc_names):
240            r_length = []
241            for r_name in e_content[e_name]:
242                if isinstance(self.idl[r_name], range):
243                    r_length.append(len(self.idl[r_name]))
244                else:
245                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
246
247            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
248            w_max = max(r_length) // 2
249            e_gamma[e_name] = np.zeros(w_max)
250            self.e_rho[e_name] = np.zeros(w_max)
251            self.e_drho[e_name] = np.zeros(w_max)
252
253            for r_name in e_content[e_name]:
254                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
255
256            gamma_div = np.zeros(w_max)
257            for r_name in e_content[e_name]:
258                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
259            gamma_div[gamma_div < 1] = 1.0
260            e_gamma[e_name] /= gamma_div[:w_max]
261
262            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
263                self.e_tauint[e_name] = 0.5
264                self.e_dtauint[e_name] = 0.0
265                self.e_dvalue[e_name] = 0.0
266                self.e_ddvalue[e_name] = 0.0
267                self.e_windowsize[e_name] = 0
268                continue
269
270            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
271            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
272            # Make sure no entry of tauint is smaller than 0.5
273            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
274            # hep-lat/0306017 eq. (42)
275            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
276            self.e_n_dtauint[e_name][0] = 0.0
277
278            def _compute_drho(i):
279                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
280                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
281
282            _compute_drho(1)
283            if self.tau_exp[e_name] > 0:
284                texp = self.tau_exp[e_name]
285                # Critical slowing down analysis
286                if w_max // 2 <= 1:
287                    raise Exception("Need at least 8 samples for tau_exp error analysis")
288                for n in range(1, w_max // 2):
289                    _compute_drho(n + 1)
290                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
291                        # Bias correction hep-lat/0306017 eq. (49) included
292                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
293                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
294                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
295                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
296                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
297                        self.e_windowsize[e_name] = n
298                        break
299            else:
300                if self.S[e_name] == 0.0:
301                    self.e_tauint[e_name] = 0.5
302                    self.e_dtauint[e_name] = 0.0
303                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
304                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
305                    self.e_windowsize[e_name] = 0
306                else:
307                    # Standard automatic windowing procedure
308                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
309                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
310                    for n in range(1, w_max):
311                        if n < w_max // 2 - 2:
312                            _compute_drho(n + 1)
313                        if g_w[n - 1] < 0 or n >= w_max - 1:
314                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
315                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
316                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
317                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
318                            self.e_windowsize[e_name] = n
319                            break
320
321            self._dvalue += self.e_dvalue[e_name] ** 2
322            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
323
324        for e_name in self.cov_names:
325            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
326            self.e_ddvalue[e_name] = 0
327            self._dvalue += self.e_dvalue[e_name]**2
328
329        self._dvalue = np.sqrt(self._dvalue)
330        if self._dvalue == 0.0:
331            self.ddvalue = 0.0
332        else:
333            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
334        return
335
336    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
337        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
338           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
339
340        Parameters
341        ----------
342        deltas : list
343            List of fluctuations
344        idx : list
345            List or range of configurations on which the deltas are defined.
346        shape : int
347            Number of configurations in idx.
348        w_max : int
349            Upper bound for the summation window.
350        fft : bool
351            determines whether the fft algorithm is used for the computation
352            of the autocorrelation function.
353        """
354        gamma = np.zeros(w_max)
355        deltas = _expand_deltas(deltas, idx, shape)
356        new_shape = len(deltas)
357        if fft:
358            max_gamma = min(new_shape, w_max)
359            # The padding for the fft has to be even
360            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
361            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
362        else:
363            for n in range(w_max):
364                if new_shape - n >= 0:
365                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
366
367        return gamma
368
369    def details(self, ens_content=True):
370        """Output detailed properties of the Obs.
371
372        Parameters
373        ----------
374        ens_content : bool
375            print details about the ensembles and replica if true.
376        """
377        if self.tag is not None:
378            print("Description:", self.tag)
379        if not hasattr(self, 'e_dvalue'):
380            print('Result\t %3.8e' % (self.value))
381        else:
382            if self.value == 0.0:
383                percentage = np.nan
384            else:
385                percentage = np.abs(self._dvalue / self.value) * 100
386            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
387            if len(self.e_names) > 1:
388                print(' Ensemble errors:')
389            for e_name in self.mc_names:
390                if len(self.e_names) > 1:
391                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
392                if self.tau_exp[e_name] > 0:
393                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
394                else:
395                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
396            for e_name in self.cov_names:
397                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
398        if ens_content is True:
399            if len(self.e_names) == 1:
400                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
401            else:
402                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
403            my_string_list = []
404            for key, value in sorted(self.e_content.items()):
405                if key not in self.covobs:
406                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
407                    if len(value) == 1:
408                        my_string += f': {self.shape[value[0]]} configurations'
409                        if isinstance(self.idl[value[0]], range):
410                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
411                        else:
412                            my_string += ' (irregular range)'
413                    else:
414                        sublist = []
415                        for v in value:
416                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
417                            my_substring += f': {self.shape[v]} configurations'
418                            if isinstance(self.idl[v], range):
419                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
420                            else:
421                                my_substring += ' (irregular range)'
422                            sublist.append(my_substring)
423
424                        my_string += '\n' + '\n'.join(sublist)
425                else:
426                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
427                my_string_list.append(my_string)
428            print('\n'.join(my_string_list))
429
430    def is_zero_within_error(self, sigma=1):
431        """Checks whether the observable is zero within 'sigma' standard errors.
432
433        Parameters
434        ----------
435        sigma : int
436            Number of standard errors used for the check.
437
438        Works only properly when the gamma method was run.
439        """
440        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
441
442    def is_zero(self, atol=1e-10):
443        """Checks whether the observable is zero within a given tolerance.
444
445        Parameters
446        ----------
447        atol : float
448            Absolute tolerance (for details see numpy documentation).
449        """
450        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
451
452    def plot_tauint(self, save=None):
453        """Plot integrated autocorrelation time for each ensemble.
454
455        Parameters
456        ----------
457        save : str
458            saves the figure to a file named 'save' if.
459        """
460        if not hasattr(self, 'e_dvalue'):
461            raise Exception('Run the gamma method first.')
462
463        for e, e_name in enumerate(self.mc_names):
464            fig = plt.figure()
465            plt.xlabel(r'$W$')
466            plt.ylabel(r'$\tau_\mathrm{int}$')
467            length = int(len(self.e_n_tauint[e_name]))
468            if self.tau_exp[e_name] > 0:
469                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
470                x_help = np.arange(2 * self.tau_exp[e_name])
471                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
472                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
473                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
474                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
475                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
476                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
477                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
478            else:
479                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
480                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
481
482            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
483            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
484            plt.legend()
485            plt.xlim(-0.5, xmax)
486            ylim = plt.ylim()
487            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
488            plt.draw()
489            if save:
490                fig.savefig(save + "_" + str(e))
491
492    def plot_rho(self, save=None):
493        """Plot normalized autocorrelation function time for each ensemble.
494
495        Parameters
496        ----------
497        save : str
498            saves the figure to a file named 'save' if.
499        """
500        if not hasattr(self, 'e_dvalue'):
501            raise Exception('Run the gamma method first.')
502        for e, e_name in enumerate(self.mc_names):
503            fig = plt.figure()
504            plt.xlabel('W')
505            plt.ylabel('rho')
506            length = int(len(self.e_drho[e_name]))
507            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
508            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
509            if self.tau_exp[e_name] > 0:
510                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
511                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
512                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
513                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
514            else:
515                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
516                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
517            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
518            plt.xlim(-0.5, xmax)
519            plt.draw()
520            if save:
521                fig.savefig(save + "_" + str(e))
522
523    def plot_rep_dist(self):
524        """Plot replica distribution for each ensemble with more than one replicum."""
525        if not hasattr(self, 'e_dvalue'):
526            raise Exception('Run the gamma method first.')
527        for e, e_name in enumerate(self.mc_names):
528            if len(self.e_content[e_name]) == 1:
529                print('No replica distribution for a single replicum (', e_name, ')')
530                continue
531            r_length = []
532            sub_r_mean = 0
533            for r, r_name in enumerate(self.e_content[e_name]):
534                r_length.append(len(self.deltas[r_name]))
535                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
536            e_N = np.sum(r_length)
537            sub_r_mean /= e_N
538            arr = np.zeros(len(self.e_content[e_name]))
539            for r, r_name in enumerate(self.e_content[e_name]):
540                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
541            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
542            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
543            plt.draw()
544
545    def plot_history(self, expand=True):
546        """Plot derived Monte Carlo history for each ensemble
547
548        Parameters
549        ----------
550        expand : bool
551            show expanded history for irregular Monte Carlo chains (default: True).
552        """
553        for e, e_name in enumerate(self.mc_names):
554            plt.figure()
555            r_length = []
556            tmp = []
557            tmp_expanded = []
558            for r, r_name in enumerate(self.e_content[e_name]):
559                tmp.append(self.deltas[r_name] + self.r_values[r_name])
560                if expand:
561                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
562                    r_length.append(len(tmp_expanded[-1]))
563                else:
564                    r_length.append(len(tmp[-1]))
565            e_N = np.sum(r_length)
566            x = np.arange(e_N)
567            y_test = np.concatenate(tmp, axis=0)
568            if expand:
569                y = np.concatenate(tmp_expanded, axis=0)
570            else:
571                y = y_test
572            plt.errorbar(x, y, fmt='.', markersize=3)
573            plt.xlim(-0.5, e_N - 0.5)
574            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
575            plt.draw()
576
577    def plot_piechart(self, save=None):
578        """Plot piechart which shows the fractional contribution of each
579        ensemble to the error and returns a dictionary containing the fractions.
580
581        Parameters
582        ----------
583        save : str
584            saves the figure to a file named 'save' if.
585        """
586        if not hasattr(self, 'e_dvalue'):
587            raise Exception('Run the gamma method first.')
588        if np.isclose(0.0, self._dvalue, atol=1e-15):
589            raise Exception('Error is 0.0')
590        labels = self.e_names
591        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
592        fig1, ax1 = plt.subplots()
593        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
594        ax1.axis('equal')
595        plt.draw()
596        if save:
597            fig1.savefig(save)
598
599        return dict(zip(self.e_names, sizes))
600
601    def dump(self, filename, datatype="json.gz", description="", **kwargs):
602        """Dump the Obs to a file 'name' of chosen format.
603
604        Parameters
605        ----------
606        filename : str
607            name of the file to be saved.
608        datatype : str
609            Format of the exported file. Supported formats include
610            "json.gz" and "pickle"
611        description : str
612            Description for output file, only relevant for json.gz format.
613        path : str
614            specifies a custom path for the file (default '.')
615        """
616        if 'path' in kwargs:
617            file_name = kwargs.get('path') + '/' + filename
618        else:
619            file_name = filename
620
621        if datatype == "json.gz":
622            from .input.json import dump_to_json
623            dump_to_json([self], file_name, description=description)
624        elif datatype == "pickle":
625            with open(file_name + '.p', 'wb') as fb:
626                pickle.dump(self, fb)
627        else:
628            raise Exception("Unknown datatype " + str(datatype))
629
630    def export_jackknife(self):
631        """Export jackknife samples from the Obs
632
633        Returns
634        -------
635        numpy.ndarray
636            Returns a numpy array of length N + 1 where N is the number of samples
637            for the given ensemble and replicum. The zeroth entry of the array contains
638            the mean value of the Obs, entries 1 to N contain the N jackknife samples
639            derived from the Obs. The current implementation only works for observables
640            defined on exactly one ensemble and replicum. The derived jackknife samples
641            should agree with samples from a full jackknife analysis up to O(1/N).
642        """
643
644        if len(self.names) != 1:
645            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
646
647        name = self.names[0]
648        full_data = self.deltas[name] + self.r_values[name]
649        n = full_data.size
650        mean = self.value
651        tmp_jacks = np.zeros(n + 1)
652        tmp_jacks[0] = mean
653        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
654        return tmp_jacks
655
656    def __float__(self):
657        return float(self.value)
658
659    def __repr__(self):
660        return 'Obs[' + str(self) + ']'
661
662    def __str__(self):
663        if self._dvalue == 0.0:
664            return str(self.value)
665        fexp = np.floor(np.log10(self._dvalue))
666        if fexp < 0.0:
667            return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
668        elif fexp == 0.0:
669            return '{:.1f}({:1.1f})'.format(self.value, self._dvalue)
670        else:
671            return '{:.0f}({:2.0f})'.format(self.value, self._dvalue)
672
673    # Overload comparisons
674    def __lt__(self, other):
675        return self.value < other
676
677    def __le__(self, other):
678        return self.value <= other
679
680    def __gt__(self, other):
681        return self.value > other
682
683    def __ge__(self, other):
684        return self.value >= other
685
686    def __eq__(self, other):
687        return (self - other).is_zero()
688
689    def __ne__(self, other):
690        return not (self - other).is_zero()
691
692    # Overload math operations
693    def __add__(self, y):
694        if isinstance(y, Obs):
695            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
696        else:
697            if isinstance(y, np.ndarray):
698                return np.array([self + o for o in y])
699            elif y.__class__.__name__ in ['Corr', 'CObs']:
700                return NotImplemented
701            else:
702                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
703
704    def __radd__(self, y):
705        return self + y
706
707    def __mul__(self, y):
708        if isinstance(y, Obs):
709            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
710        else:
711            if isinstance(y, np.ndarray):
712                return np.array([self * o for o in y])
713            elif isinstance(y, complex):
714                return CObs(self * y.real, self * y.imag)
715            elif y.__class__.__name__ in ['Corr', 'CObs']:
716                return NotImplemented
717            else:
718                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
719
720    def __rmul__(self, y):
721        return self * y
722
723    def __sub__(self, y):
724        if isinstance(y, Obs):
725            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
726        else:
727            if isinstance(y, np.ndarray):
728                return np.array([self - o for o in y])
729            elif y.__class__.__name__ in ['Corr', 'CObs']:
730                return NotImplemented
731            else:
732                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
733
734    def __rsub__(self, y):
735        return -1 * (self - y)
736
737    def __pos__(self):
738        return self
739
740    def __neg__(self):
741        return -1 * self
742
743    def __truediv__(self, y):
744        if isinstance(y, Obs):
745            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
746        else:
747            if isinstance(y, np.ndarray):
748                return np.array([self / o for o in y])
749            elif y.__class__.__name__ in ['Corr', 'CObs']:
750                return NotImplemented
751            else:
752                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
753
754    def __rtruediv__(self, y):
755        if isinstance(y, Obs):
756            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
757        else:
758            if isinstance(y, np.ndarray):
759                return np.array([o / self for o in y])
760            elif y.__class__.__name__ in ['Corr', 'CObs']:
761                return NotImplemented
762            else:
763                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
764
765    def __pow__(self, y):
766        if isinstance(y, Obs):
767            return derived_observable(lambda x: x[0] ** x[1], [self, y])
768        else:
769            return derived_observable(lambda x: x[0] ** y, [self])
770
771    def __rpow__(self, y):
772        if isinstance(y, Obs):
773            return derived_observable(lambda x: x[0] ** x[1], [y, self])
774        else:
775            return derived_observable(lambda x: y ** x[0], [self])
776
777    def __abs__(self):
778        return derived_observable(lambda x: anp.abs(x[0]), [self])
779
780    # Overload numpy functions
781    def sqrt(self):
782        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
783
784    def log(self):
785        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
786
787    def exp(self):
788        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
789
790    def sin(self):
791        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
792
793    def cos(self):
794        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
795
796    def tan(self):
797        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
798
799    def arcsin(self):
800        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
801
802    def arccos(self):
803        return derived_observable(lambda x: anp.arccos(x[0]), [self])
804
805    def arctan(self):
806        return derived_observable(lambda x: anp.arctan(x[0]), [self])
807
808    def sinh(self):
809        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
810
811    def cosh(self):
812        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
813
814    def tanh(self):
815        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
816
817    def arcsinh(self):
818        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
819
820    def arccosh(self):
821        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
822
823    def arctanh(self):
824        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 59    def __init__(self, samples, names, idl=None, **kwargs):
 60        """ Initialize Obs object.
 61
 62        Parameters
 63        ----------
 64        samples : list
 65            list of numpy arrays containing the Monte Carlo samples
 66        names : list
 67            list of strings labeling the individual samples
 68        idl : list, optional
 69            list of ranges or lists on which the samples are defined
 70        """
 71
 72        if kwargs.get("means") is None and len(samples):
 73            if len(samples) != len(names):
 74                raise Exception('Length of samples and names incompatible.')
 75            if idl is not None:
 76                if len(idl) != len(names):
 77                    raise Exception('Length of idl incompatible with samples and names.')
 78            name_length = len(names)
 79            if name_length > 1:
 80                if name_length != len(set(names)):
 81                    raise Exception('names are not unique.')
 82                if not all(isinstance(x, str) for x in names):
 83                    raise TypeError('All names have to be strings.')
 84            else:
 85                if not isinstance(names[0], str):
 86                    raise TypeError('All names have to be strings.')
 87            if min(len(x) for x in samples) <= 4:
 88                raise Exception('Samples have to have at least 5 entries.')
 89
 90        self.names = sorted(names)
 91        self.shape = {}
 92        self.r_values = {}
 93        self.deltas = {}
 94        self._covobs = {}
 95
 96        self._value = 0
 97        self.N = 0
 98        self.is_merged = {}
 99        self.idl = {}
100        if idl is not None:
101            for name, idx in sorted(zip(names, idl)):
102                if isinstance(idx, range):
103                    self.idl[name] = idx
104                elif isinstance(idx, (list, np.ndarray)):
105                    dc = np.unique(np.diff(idx))
106                    if np.any(dc < 0):
107                        raise Exception("Unsorted idx for idl[%s]" % (name))
108                    if len(dc) == 1:
109                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
110                    else:
111                        self.idl[name] = list(idx)
112                else:
113                    raise Exception('incompatible type for idl[%s].' % (name))
114        else:
115            for name, sample in sorted(zip(names, samples)):
116                self.idl[name] = range(1, len(sample) + 1)
117
118        if kwargs.get("means") is not None:
119            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
120                self.shape[name] = len(self.idl[name])
121                self.N += self.shape[name]
122                self.r_values[name] = mean
123                self.deltas[name] = sample
124        else:
125            for name, sample in sorted(zip(names, samples)):
126                self.shape[name] = len(self.idl[name])
127                self.N += self.shape[name]
128                if len(sample) != self.shape[name]:
129                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
130                self.r_values[name] = np.mean(sample)
131                self.deltas[name] = sample - self.r_values[name]
132                self._value += self.shape[name] * self.r_values[name]
133            self._value /= self.N
134
135        self._dvalue = 0.0
136        self.ddvalue = 0.0
137        self.reweighted = False
138
139        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
S_global = 2.0
S_dict = {}
tau_exp_global = 0.0
tau_exp_dict = {}
N_sigma_global = 1.0
N_sigma_dict = {}
filter_eps = 1e-10
names
shape
r_values
deltas
N
is_merged
idl
ddvalue
reweighted
tag
value
dvalue
e_names
cov_names
mc_names
e_content
covobs
def gamma_method(self, **kwargs)
174    def gamma_method(self, **kwargs):
175        """Estimate the error and related properties of the Obs.
176
177        Parameters
178        ----------
179        S : float
180            specifies a custom value for the parameter S (default 2.0).
181            If set to 0 it is assumed that the data exhibits no
182            autocorrelation. In this case the error estimates coincides
183            with the sample standard error.
184        tau_exp : float
185            positive value triggers the critical slowing down analysis
186            (default 0.0).
187        N_sigma : float
188            number of standard deviations from zero until the tail is
189            attached to the autocorrelation function (default 1).
190        fft : bool
191            determines whether the fft algorithm is used for the computation
192            of the autocorrelation function (default True)
193        """
194
195        e_content = self.e_content
196        self.e_dvalue = {}
197        self.e_ddvalue = {}
198        self.e_tauint = {}
199        self.e_dtauint = {}
200        self.e_windowsize = {}
201        self.e_n_tauint = {}
202        self.e_n_dtauint = {}
203        e_gamma = {}
204        self.e_rho = {}
205        self.e_drho = {}
206        self._dvalue = 0
207        self.ddvalue = 0
208
209        self.S = {}
210        self.tau_exp = {}
211        self.N_sigma = {}
212
213        if kwargs.get('fft') is False:
214            fft = False
215        else:
216            fft = True
217
218        def _parse_kwarg(kwarg_name):
219            if kwarg_name in kwargs:
220                tmp = kwargs.get(kwarg_name)
221                if isinstance(tmp, (int, float)):
222                    if tmp < 0:
223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
224                    for e, e_name in enumerate(self.e_names):
225                        getattr(self, kwarg_name)[e_name] = tmp
226                else:
227                    raise TypeError(kwarg_name + ' is not in proper format.')
228            else:
229                for e, e_name in enumerate(self.e_names):
230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
232                    else:
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
234
235        _parse_kwarg('S')
236        _parse_kwarg('tau_exp')
237        _parse_kwarg('N_sigma')
238
239        for e, e_name in enumerate(self.mc_names):
240            r_length = []
241            for r_name in e_content[e_name]:
242                if isinstance(self.idl[r_name], range):
243                    r_length.append(len(self.idl[r_name]))
244                else:
245                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
246
247            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
248            w_max = max(r_length) // 2
249            e_gamma[e_name] = np.zeros(w_max)
250            self.e_rho[e_name] = np.zeros(w_max)
251            self.e_drho[e_name] = np.zeros(w_max)
252
253            for r_name in e_content[e_name]:
254                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
255
256            gamma_div = np.zeros(w_max)
257            for r_name in e_content[e_name]:
258                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
259            gamma_div[gamma_div < 1] = 1.0
260            e_gamma[e_name] /= gamma_div[:w_max]
261
262            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
263                self.e_tauint[e_name] = 0.5
264                self.e_dtauint[e_name] = 0.0
265                self.e_dvalue[e_name] = 0.0
266                self.e_ddvalue[e_name] = 0.0
267                self.e_windowsize[e_name] = 0
268                continue
269
270            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
271            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
272            # Make sure no entry of tauint is smaller than 0.5
273            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
274            # hep-lat/0306017 eq. (42)
275            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
276            self.e_n_dtauint[e_name][0] = 0.0
277
278            def _compute_drho(i):
279                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
280                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
281
282            _compute_drho(1)
283            if self.tau_exp[e_name] > 0:
284                texp = self.tau_exp[e_name]
285                # Critical slowing down analysis
286                if w_max // 2 <= 1:
287                    raise Exception("Need at least 8 samples for tau_exp error analysis")
288                for n in range(1, w_max // 2):
289                    _compute_drho(n + 1)
290                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
291                        # Bias correction hep-lat/0306017 eq. (49) included
292                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
293                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
294                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
295                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
296                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
297                        self.e_windowsize[e_name] = n
298                        break
299            else:
300                if self.S[e_name] == 0.0:
301                    self.e_tauint[e_name] = 0.5
302                    self.e_dtauint[e_name] = 0.0
303                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
304                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
305                    self.e_windowsize[e_name] = 0
306                else:
307                    # Standard automatic windowing procedure
308                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
309                    g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N)
310                    for n in range(1, w_max):
311                        if n < w_max // 2 - 2:
312                            _compute_drho(n + 1)
313                        if g_w[n - 1] < 0 or n >= w_max - 1:
314                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
315                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
316                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
317                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
318                            self.e_windowsize[e_name] = n
319                            break
320
321            self._dvalue += self.e_dvalue[e_name] ** 2
322            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
323
324        for e_name in self.cov_names:
325            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
326            self.e_ddvalue[e_name] = 0
327            self._dvalue += self.e_dvalue[e_name]**2
328
329        self._dvalue = np.sqrt(self._dvalue)
330        if self._dvalue == 0.0:
331            self.ddvalue = 0.0
332        else:
333            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
334        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True)
369    def details(self, ens_content=True):
370        """Output detailed properties of the Obs.
371
372        Parameters
373        ----------
374        ens_content : bool
375            print details about the ensembles and replica if true.
376        """
377        if self.tag is not None:
378            print("Description:", self.tag)
379        if not hasattr(self, 'e_dvalue'):
380            print('Result\t %3.8e' % (self.value))
381        else:
382            if self.value == 0.0:
383                percentage = np.nan
384            else:
385                percentage = np.abs(self._dvalue / self.value) * 100
386            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
387            if len(self.e_names) > 1:
388                print(' Ensemble errors:')
389            for e_name in self.mc_names:
390                if len(self.e_names) > 1:
391                    print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
392                if self.tau_exp[e_name] > 0:
393                    print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f,  N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
394                else:
395                    print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
396            for e_name in self.cov_names:
397                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
398        if ens_content is True:
399            if len(self.e_names) == 1:
400                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
401            else:
402                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
403            my_string_list = []
404            for key, value in sorted(self.e_content.items()):
405                if key not in self.covobs:
406                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
407                    if len(value) == 1:
408                        my_string += f': {self.shape[value[0]]} configurations'
409                        if isinstance(self.idl[value[0]], range):
410                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
411                        else:
412                            my_string += ' (irregular range)'
413                    else:
414                        sublist = []
415                        for v in value:
416                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
417                            my_substring += f': {self.shape[v]} configurations'
418                            if isinstance(self.idl[v], range):
419                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
420                            else:
421                                my_substring += ' (irregular range)'
422                            sublist.append(my_substring)
423
424                        my_string += '\n' + '\n'.join(sublist)
425                else:
426                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
427                my_string_list.append(my_string)
428            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def is_zero_within_error(self, sigma=1)
430    def is_zero_within_error(self, sigma=1):
431        """Checks whether the observable is zero within 'sigma' standard errors.
432
433        Parameters
434        ----------
435        sigma : int
436            Number of standard errors used for the check.
437
438        Works only properly when the gamma method was run.
439        """
440        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10)
442    def is_zero(self, atol=1e-10):
443        """Checks whether the observable is zero within a given tolerance.
444
445        Parameters
446        ----------
447        atol : float
448            Absolute tolerance (for details see numpy documentation).
449        """
450        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None)
452    def plot_tauint(self, save=None):
453        """Plot integrated autocorrelation time for each ensemble.
454
455        Parameters
456        ----------
457        save : str
458            saves the figure to a file named 'save' if.
459        """
460        if not hasattr(self, 'e_dvalue'):
461            raise Exception('Run the gamma method first.')
462
463        for e, e_name in enumerate(self.mc_names):
464            fig = plt.figure()
465            plt.xlabel(r'$W$')
466            plt.ylabel(r'$\tau_\mathrm{int}$')
467            length = int(len(self.e_n_tauint[e_name]))
468            if self.tau_exp[e_name] > 0:
469                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
470                x_help = np.arange(2 * self.tau_exp[e_name])
471                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
472                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
473                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
474                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
475                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
476                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
477                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
478            else:
479                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
480                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
481
482            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
483            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
484            plt.legend()
485            plt.xlim(-0.5, xmax)
486            ylim = plt.ylim()
487            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
488            plt.draw()
489            if save:
490                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None)
492    def plot_rho(self, save=None):
493        """Plot normalized autocorrelation function time for each ensemble.
494
495        Parameters
496        ----------
497        save : str
498            saves the figure to a file named 'save' if.
499        """
500        if not hasattr(self, 'e_dvalue'):
501            raise Exception('Run the gamma method first.')
502        for e, e_name in enumerate(self.mc_names):
503            fig = plt.figure()
504            plt.xlabel('W')
505            plt.ylabel('rho')
506            length = int(len(self.e_drho[e_name]))
507            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
508            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
509            if self.tau_exp[e_name] > 0:
510                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
511                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
512                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
513                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
514            else:
515                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
516                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
517            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
518            plt.xlim(-0.5, xmax)
519            plt.draw()
520            if save:
521                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self)
523    def plot_rep_dist(self):
524        """Plot replica distribution for each ensemble with more than one replicum."""
525        if not hasattr(self, 'e_dvalue'):
526            raise Exception('Run the gamma method first.')
527        for e, e_name in enumerate(self.mc_names):
528            if len(self.e_content[e_name]) == 1:
529                print('No replica distribution for a single replicum (', e_name, ')')
530                continue
531            r_length = []
532            sub_r_mean = 0
533            for r, r_name in enumerate(self.e_content[e_name]):
534                r_length.append(len(self.deltas[r_name]))
535                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
536            e_N = np.sum(r_length)
537            sub_r_mean /= e_N
538            arr = np.zeros(len(self.e_content[e_name]))
539            for r, r_name in enumerate(self.e_content[e_name]):
540                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
541            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
542            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
543            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True)
545    def plot_history(self, expand=True):
546        """Plot derived Monte Carlo history for each ensemble
547
548        Parameters
549        ----------
550        expand : bool
551            show expanded history for irregular Monte Carlo chains (default: True).
552        """
553        for e, e_name in enumerate(self.mc_names):
554            plt.figure()
555            r_length = []
556            tmp = []
557            tmp_expanded = []
558            for r, r_name in enumerate(self.e_content[e_name]):
559                tmp.append(self.deltas[r_name] + self.r_values[r_name])
560                if expand:
561                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
562                    r_length.append(len(tmp_expanded[-1]))
563                else:
564                    r_length.append(len(tmp[-1]))
565            e_N = np.sum(r_length)
566            x = np.arange(e_N)
567            y_test = np.concatenate(tmp, axis=0)
568            if expand:
569                y = np.concatenate(tmp_expanded, axis=0)
570            else:
571                y = y_test
572            plt.errorbar(x, y, fmt='.', markersize=3)
573            plt.xlim(-0.5, e_N - 0.5)
574            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
575            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None)
577    def plot_piechart(self, save=None):
578        """Plot piechart which shows the fractional contribution of each
579        ensemble to the error and returns a dictionary containing the fractions.
580
581        Parameters
582        ----------
583        save : str
584            saves the figure to a file named 'save' if.
585        """
586        if not hasattr(self, 'e_dvalue'):
587            raise Exception('Run the gamma method first.')
588        if np.isclose(0.0, self._dvalue, atol=1e-15):
589            raise Exception('Error is 0.0')
590        labels = self.e_names
591        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
592        fig1, ax1 = plt.subplots()
593        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
594        ax1.axis('equal')
595        plt.draw()
596        if save:
597            fig1.savefig(save)
598
599        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs)
601    def dump(self, filename, datatype="json.gz", description="", **kwargs):
602        """Dump the Obs to a file 'name' of chosen format.
603
604        Parameters
605        ----------
606        filename : str
607            name of the file to be saved.
608        datatype : str
609            Format of the exported file. Supported formats include
610            "json.gz" and "pickle"
611        description : str
612            Description for output file, only relevant for json.gz format.
613        path : str
614            specifies a custom path for the file (default '.')
615        """
616        if 'path' in kwargs:
617            file_name = kwargs.get('path') + '/' + filename
618        else:
619            file_name = filename
620
621        if datatype == "json.gz":
622            from .input.json import dump_to_json
623            dump_to_json([self], file_name, description=description)
624        elif datatype == "pickle":
625            with open(file_name + '.p', 'wb') as fb:
626                pickle.dump(self, fb)
627        else:
628            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self)
630    def export_jackknife(self):
631        """Export jackknife samples from the Obs
632
633        Returns
634        -------
635        numpy.ndarray
636            Returns a numpy array of length N + 1 where N is the number of samples
637            for the given ensemble and replicum. The zeroth entry of the array contains
638            the mean value of the Obs, entries 1 to N contain the N jackknife samples
639            derived from the Obs. The current implementation only works for observables
640            defined on exactly one ensemble and replicum. The derived jackknife samples
641            should agree with samples from a full jackknife analysis up to O(1/N).
642        """
643
644        if len(self.names) != 1:
645            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
646
647        name = self.names[0]
648        full_data = self.deltas[name] + self.r_values[name]
649        n = full_data.size
650        mean = self.value
651        tmp_jacks = np.zeros(n + 1)
652        tmp_jacks[0] = mean
653        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
654        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def sqrt(self)
781    def sqrt(self):
782        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self)
784    def log(self):
785        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self)
787    def exp(self):
788        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self)
790    def sin(self):
791        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self)
793    def cos(self):
794        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self)
796    def tan(self):
797        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self)
799    def arcsin(self):
800        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self)
802    def arccos(self):
803        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self)
805    def arctan(self):
806        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self)
808    def sinh(self):
809        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self)
811    def cosh(self):
812        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self)
814    def tanh(self):
815        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self)
817    def arcsinh(self):
818        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self)
820    def arccosh(self):
821        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self)
823    def arctanh(self):
824        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
N_sigma
S
e_ddvalue
e_drho
e_dtauint
e_dvalue
e_n_dtauint
e_n_tauint
e_rho
e_tauint
e_windowsize
tau_exp
class CObs:
827class CObs:
828    """Class for a complex valued observable."""
829    __slots__ = ['_real', '_imag', 'tag']
830
831    def __init__(self, real, imag=0.0):
832        self._real = real
833        self._imag = imag
834        self.tag = None
835
836    @property
837    def real(self):
838        return self._real
839
840    @property
841    def imag(self):
842        return self._imag
843
844    def gamma_method(self, **kwargs):
845        """Executes the gamma_method for the real and the imaginary part."""
846        if isinstance(self.real, Obs):
847            self.real.gamma_method(**kwargs)
848        if isinstance(self.imag, Obs):
849            self.imag.gamma_method(**kwargs)
850
851    def is_zero(self):
852        """Checks whether both real and imaginary part are zero within machine precision."""
853        return self.real == 0.0 and self.imag == 0.0
854
855    def conjugate(self):
856        return CObs(self.real, -self.imag)
857
858    def __add__(self, other):
859        if isinstance(other, np.ndarray):
860            return other + self
861        elif hasattr(other, 'real') and hasattr(other, 'imag'):
862            return CObs(self.real + other.real,
863                        self.imag + other.imag)
864        else:
865            return CObs(self.real + other, self.imag)
866
867    def __radd__(self, y):
868        return self + y
869
870    def __sub__(self, other):
871        if isinstance(other, np.ndarray):
872            return -1 * (other - self)
873        elif hasattr(other, 'real') and hasattr(other, 'imag'):
874            return CObs(self.real - other.real, self.imag - other.imag)
875        else:
876            return CObs(self.real - other, self.imag)
877
878    def __rsub__(self, other):
879        return -1 * (self - other)
880
881    def __mul__(self, other):
882        if isinstance(other, np.ndarray):
883            return other * self
884        elif hasattr(other, 'real') and hasattr(other, 'imag'):
885            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
886                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
887                                               [self.real, other.real, self.imag, other.imag],
888                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
889                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
890                                               [self.real, other.real, self.imag, other.imag],
891                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
892            elif getattr(other, 'imag', 0) != 0:
893                return CObs(self.real * other.real - self.imag * other.imag,
894                            self.imag * other.real + self.real * other.imag)
895            else:
896                return CObs(self.real * other.real, self.imag * other.real)
897        else:
898            return CObs(self.real * other, self.imag * other)
899
900    def __rmul__(self, other):
901        return self * other
902
903    def __truediv__(self, other):
904        if isinstance(other, np.ndarray):
905            return 1 / (other / self)
906        elif hasattr(other, 'real') and hasattr(other, 'imag'):
907            r = other.real ** 2 + other.imag ** 2
908            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
909        else:
910            return CObs(self.real / other, self.imag / other)
911
912    def __rtruediv__(self, other):
913        r = self.real ** 2 + self.imag ** 2
914        if hasattr(other, 'real') and hasattr(other, 'imag'):
915            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
916        else:
917            return CObs(self.real * other / r, -self.imag * other / r)
918
919    def __abs__(self):
920        return np.sqrt(self.real**2 + self.imag**2)
921
922    def __pos__(self):
923        return self
924
925    def __neg__(self):
926        return -1 * self
927
928    def __eq__(self, other):
929        return self.real == other.real and self.imag == other.imag
930
931    def __str__(self):
932        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
933
934    def __repr__(self):
935        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0)
831    def __init__(self, real, imag=0.0):
832        self._real = real
833        self._imag = imag
834        self.tag = None
tag
real
imag
def gamma_method(self, **kwargs)
844    def gamma_method(self, **kwargs):
845        """Executes the gamma_method for the real and the imaginary part."""
846        if isinstance(self.real, Obs):
847            self.real.gamma_method(**kwargs)
848        if isinstance(self.imag, Obs):
849            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self)
851    def is_zero(self):
852        """Checks whether both real and imaginary part are zero within machine precision."""
853        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self)
855    def conjugate(self):
856        return CObs(self.real, -self.imag)
def derived_observable(func, data, array_mode=False, **kwargs)
1104def derived_observable(func, data, array_mode=False, **kwargs):
1105    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1106
1107    Parameters
1108    ----------
1109    func : object
1110        arbitrary function of the form func(data, **kwargs). For the
1111        automatic differentiation to work, all numpy functions have to have
1112        the autograd wrapper (use 'import autograd.numpy as anp').
1113    data : list
1114        list of Obs, e.g. [obs1, obs2, obs3].
1115    num_grad : bool
1116        if True, numerical derivatives are used instead of autograd
1117        (default False). To control the numerical differentiation the
1118        kwargs of numdifftools.step_generators.MaxStepGenerator
1119        can be used.
1120    man_grad : list
1121        manually supply a list or an array which contains the jacobian
1122        of func. Use cautiously, supplying the wrong derivative will
1123        not be intercepted.
1124
1125    Notes
1126    -----
1127    For simple mathematical operations it can be practical to use anonymous
1128    functions. For the ratio of two observables one can e.g. use
1129
1130    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1131    """
1132
1133    data = np.asarray(data)
1134    raveled_data = data.ravel()
1135
1136    # Workaround for matrix operations containing non Obs data
1137    if not all(isinstance(x, Obs) for x in raveled_data):
1138        for i in range(len(raveled_data)):
1139            if isinstance(raveled_data[i], (int, float)):
1140                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1141
1142    allcov = {}
1143    for o in raveled_data:
1144        for name in o.cov_names:
1145            if name in allcov:
1146                if not np.allclose(allcov[name], o.covobs[name].cov):
1147                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1148            else:
1149                allcov[name] = o.covobs[name].cov
1150
1151    n_obs = len(raveled_data)
1152    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1153    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1154    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1155
1156    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1157    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1158
1159    if data.ndim == 1:
1160        values = np.array([o.value for o in data])
1161    else:
1162        values = np.vectorize(lambda x: x.value)(data)
1163
1164    new_values = func(values, **kwargs)
1165
1166    multi = int(isinstance(new_values, np.ndarray))
1167
1168    new_r_values = {}
1169    new_idl_d = {}
1170    for name in new_sample_names:
1171        idl = []
1172        tmp_values = np.zeros(n_obs)
1173        for i, item in enumerate(raveled_data):
1174            tmp_values[i] = item.r_values.get(name, item.value)
1175            tmp_idl = item.idl.get(name)
1176            if tmp_idl is not None:
1177                idl.append(tmp_idl)
1178        if multi > 0:
1179            tmp_values = np.array(tmp_values).reshape(data.shape)
1180        new_r_values[name] = func(tmp_values, **kwargs)
1181        new_idl_d[name] = _merge_idx(idl)
1182        if not is_merged[name]:
1183            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1184
1185    if 'man_grad' in kwargs:
1186        deriv = np.asarray(kwargs.get('man_grad'))
1187        if new_values.shape + data.shape != deriv.shape:
1188            raise Exception('Manual derivative does not have correct shape.')
1189    elif kwargs.get('num_grad') is True:
1190        if multi > 0:
1191            raise Exception('Multi mode currently not supported for numerical derivative')
1192        options = {
1193            'base_step': 0.1,
1194            'step_ratio': 2.5}
1195        for key in options.keys():
1196            kwarg = kwargs.get(key)
1197            if kwarg is not None:
1198                options[key] = kwarg
1199        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1200        if tmp_df.size == 1:
1201            deriv = np.array([tmp_df.real])
1202        else:
1203            deriv = tmp_df.real
1204    else:
1205        deriv = jacobian(func)(values, **kwargs)
1206
1207    final_result = np.zeros(new_values.shape, dtype=object)
1208
1209    if array_mode is True:
1210
1211        class _Zero_grad():
1212            def __init__(self, N):
1213                self.grad = np.zeros((N, 1))
1214
1215        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1216        d_extracted = {}
1217        g_extracted = {}
1218        for name in new_sample_names:
1219            d_extracted[name] = []
1220            ens_length = len(new_idl_d[name])
1221            for i_dat, dat in enumerate(data):
1222                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1223        for name in new_cov_names:
1224            g_extracted[name] = []
1225            zero_grad = _Zero_grad(new_covobs_lengths[name])
1226            for i_dat, dat in enumerate(data):
1227                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1228
1229    for i_val, new_val in np.ndenumerate(new_values):
1230        new_deltas = {}
1231        new_grad = {}
1232        if array_mode is True:
1233            for name in new_sample_names:
1234                ens_length = d_extracted[name][0].shape[-1]
1235                new_deltas[name] = np.zeros(ens_length)
1236                for i_dat, dat in enumerate(d_extracted[name]):
1237                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1238            for name in new_cov_names:
1239                new_grad[name] = 0
1240                for i_dat, dat in enumerate(g_extracted[name]):
1241                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1242        else:
1243            for j_obs, obs in np.ndenumerate(data):
1244                for name in obs.names:
1245                    if name in obs.cov_names:
1246                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1247                    else:
1248                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1249
1250        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1251
1252        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1253            raise Exception('The same name has been used for deltas and covobs!')
1254        new_samples = []
1255        new_means = []
1256        new_idl = []
1257        new_names_obs = []
1258        for name in new_names:
1259            if name not in new_covobs:
1260                if is_merged[name]:
1261                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1262                else:
1263                    filtered_deltas = new_deltas[name]
1264                    filtered_idl_d = new_idl_d[name]
1265
1266                new_samples.append(filtered_deltas)
1267                new_idl.append(filtered_idl_d)
1268                new_means.append(new_r_values[name][i_val])
1269                new_names_obs.append(name)
1270        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1271        for name in new_covobs:
1272            final_result[i_val].names.append(name)
1273        final_result[i_val]._covobs = new_covobs
1274        final_result[i_val]._value = new_val
1275        final_result[i_val].is_merged = is_merged
1276        final_result[i_val].reweighted = reweighted
1277
1278    if multi == 0:
1279        final_result = final_result.item()
1280
1281    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs)
1321def reweight(weight, obs, **kwargs):
1322    """Reweight a list of observables.
1323
1324    Parameters
1325    ----------
1326    weight : Obs
1327        Reweighting factor. An Observable that has to be defined on a superset of the
1328        configurations in obs[i].idl for all i.
1329    obs : list
1330        list of Obs, e.g. [obs1, obs2, obs3].
1331    all_configs : bool
1332        if True, the reweighted observables are normalized by the average of
1333        the reweighting factor on all configurations in weight.idl and not
1334        on the configurations in obs[i].idl.
1335    """
1336    result = []
1337    for i in range(len(obs)):
1338        if len(obs[i].cov_names):
1339            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1340        if not set(obs[i].names).issubset(weight.names):
1341            raise Exception('Error: Ensembles do not fit')
1342        for name in obs[i].names:
1343            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1344                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1345        new_samples = []
1346        w_deltas = {}
1347        for name in sorted(obs[i].names):
1348            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1349            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1350        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1351
1352        if kwargs.get('all_configs'):
1353            new_weight = weight
1354        else:
1355            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1356
1357        result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs))
1358        result[-1].reweighted = True
1359        result[-1].is_merged = obs[i].is_merged
1360
1361    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl.
def correlate(obs_a, obs_b)
1364def correlate(obs_a, obs_b):
1365    """Correlate two observables.
1366
1367    Parameters
1368    ----------
1369    obs_a : Obs
1370        First observable
1371    obs_b : Obs
1372        Second observable
1373
1374    Notes
1375    -----
1376    Keep in mind to only correlate primary observables which have not been reweighted
1377    yet. The reweighting has to be applied after correlating the observables.
1378    Currently only works if ensembles are identical (this is not strictly necessary).
1379    """
1380
1381    if sorted(obs_a.names) != sorted(obs_b.names):
1382        raise Exception('Ensembles do not fit')
1383    if len(obs_a.cov_names) or len(obs_b.cov_names):
1384        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1385    for name in obs_a.names:
1386        if obs_a.shape[name] != obs_b.shape[name]:
1387            raise Exception('Shapes of ensemble', name, 'do not fit')
1388        if obs_a.idl[name] != obs_b.idl[name]:
1389            raise Exception('idl of ensemble', name, 'do not fit')
1390
1391    if obs_a.reweighted is True:
1392        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1393    if obs_b.reweighted is True:
1394        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1395
1396    new_samples = []
1397    new_idl = []
1398    for name in sorted(obs_a.names):
1399        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1400        new_idl.append(obs_a.idl[name])
1401
1402    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1403    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1404    o.reweighted = obs_a.reweighted or obs_b.reweighted
1405    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs)
1408def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1409    r'''Calculates the error covariance matrix of a set of observables.
1410
1411    The gamma method has to be applied first to all observables.
1412
1413    Parameters
1414    ----------
1415    obs : list or numpy.ndarray
1416        List or one dimensional array of Obs
1417    visualize : bool
1418        If True plots the corresponding normalized correlation matrix (default False).
1419    correlation : bool
1420        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1421    smooth : None or int
1422        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1423        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1424        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1425        small ones.
1426
1427    Notes
1428    -----
1429    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1430    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1431    in the absence of autocorrelation.
1432    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1433    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1434    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1435    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1436    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1437    '''
1438
1439    length = len(obs)
1440
1441    max_samples = np.max([o.N for o in obs])
1442    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1443        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1444
1445    cov = np.zeros((length, length))
1446    for i in range(length):
1447        for j in range(i, length):
1448            cov[i, j] = _covariance_element(obs[i], obs[j])
1449    cov = cov + cov.T - np.diag(np.diag(cov))
1450
1451    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1452
1453    if isinstance(smooth, int):
1454        corr = _smooth_eigenvalues(corr, smooth)
1455
1456    if visualize:
1457        plt.matshow(corr, vmin=-1, vmax=1)
1458        plt.set_cmap('RdBu')
1459        plt.colorbar()
1460        plt.draw()
1461
1462    if correlation is True:
1463        return corr
1464
1465    errors = [o.dvalue for o in obs]
1466    cov = np.diag(errors) @ corr @ np.diag(errors)
1467
1468    eigenvalues = np.linalg.eigh(cov)[0]
1469    if not np.all(eigenvalues >= 0):
1470        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1471
1472    return cov

Calculates the error covariance matrix of a set of observables.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None)
1552def import_jackknife(jacks, name, idl=None):
1553    """Imports jackknife samples and returns an Obs
1554
1555    Parameters
1556    ----------
1557    jacks : numpy.ndarray
1558        numpy array containing the mean value as zeroth entry and
1559        the N jackknife samples as first to Nth entry.
1560    name : str
1561        name of the ensemble the samples are defined on.
1562    """
1563    length = len(jacks) - 1
1564    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1565    samples = jacks[1:] @ prj
1566    mean = np.mean(samples)
1567    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1568    new_obs._value = jacks[0]
1569    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def merge_obs(list_of_obs)
1572def merge_obs(list_of_obs):
1573    """Combine all observables in list_of_obs into one new observable
1574
1575    Parameters
1576    ----------
1577    list_of_obs : list
1578        list of the Obs object to be combined
1579
1580    Notes
1581    -----
1582    It is not possible to combine obs which are based on the same replicum
1583    """
1584    replist = [item for obs in list_of_obs for item in obs.names]
1585    if (len(replist) == len(set(replist))) is False:
1586        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1587    if any([len(o.cov_names) for o in list_of_obs]):
1588        raise Exception('Not possible to merge data that contains covobs!')
1589    new_dict = {}
1590    idl_dict = {}
1591    for o in list_of_obs:
1592        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1593                        for key in set(o.deltas) | set(o.r_values)})
1594        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1595
1596    names = sorted(new_dict.keys())
1597    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1598    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1599    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1600    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None)
1603def cov_Obs(means, cov, name, grad=None):
1604    """Create an Obs based on mean(s) and a covariance matrix
1605
1606    Parameters
1607    ----------
1608    mean : list of floats or float
1609        N mean value(s) of the new Obs
1610    cov : list or array
1611        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1612    name : str
1613        identifier for the covariance matrix
1614    grad : list or array
1615        Gradient of the Covobs wrt. the means belonging to cov.
1616    """
1617
1618    def covobs_to_obs(co):
1619        """Make an Obs out of a Covobs
1620
1621        Parameters
1622        ----------
1623        co : Covobs
1624            Covobs to be embedded into the Obs
1625        """
1626        o = Obs([], [], means=[])
1627        o._value = co.value
1628        o.names.append(co.name)
1629        o._covobs[co.name] = co
1630        o._dvalue = np.sqrt(co.errsq())
1631        return o
1632
1633    ol = []
1634    if isinstance(means, (float, int)):
1635        means = [means]
1636
1637    for i in range(len(means)):
1638        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1639    if ol[0].covobs[name].N != len(means):
1640        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1641    if len(ol) == 1:
1642        return ol[0]
1643    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.