pyerrors.obs

View Source

   1import warnings
   2import hashlib
   3import pickle
   4from math import gcd
   5from functools import reduce
   6import numpy as np
   7import autograd.numpy as anp  # Thinly-wrapped numpy
   8from autograd import jacobian
   9import matplotlib.pyplot as plt
  10from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  11import numdifftools as nd
  12from itertools import groupby
  13from .covobs import Covobs
  14
  15
  16class Obs:
  17    """Class for a general observable.
  18
  19    Instances of Obs are the basic objects of a pyerrors error analysis.
  20    They are initialized with a list which contains arrays of samples for
  21    different ensembles/replica and another list of same length which contains
  22    the names of the ensembles/replica. Mathematical operations can be
  23    performed on instances. The result is another instance of Obs. The error of
  24    an instance can be computed with the gamma_method. Also contains additional
  25    methods for output and visualization of the error calculation.
  26
  27    Attributes
  28    ----------
  29    S_global : float
  30        Standard value for S (default 2.0)
  31    S_dict : dict
  32        Dictionary for S values. If an entry for a given ensemble
  33        exists this overwrites the standard value for that ensemble.
  34    tau_exp_global : float
  35        Standard value for tau_exp (default 0.0)
  36    tau_exp_dict : dict
  37        Dictionary for tau_exp values. If an entry for a given ensemble exists
  38        this overwrites the standard value for that ensemble.
  39    N_sigma_global : float
  40        Standard value for N_sigma (default 1.0)
  41    N_sigma_dict : dict
  42        Dictionary for N_sigma values. If an entry for a given ensemble exists
  43        this overwrites the standard value for that ensemble.
  44    """
  45    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  46                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  47                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  48                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  49                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
  50
  51    S_global = 2.0
  52    S_dict = {}
  53    tau_exp_global = 0.0
  54    tau_exp_dict = {}
  55    N_sigma_global = 1.0
  56    N_sigma_dict = {}
  57    filter_eps = 1e-10
  58
  59    def __init__(self, samples, names, idl=None, **kwargs):
  60        """ Initialize Obs object.
  61
  62        Parameters
  63        ----------
  64        samples : list
  65            list of numpy arrays containing the Monte Carlo samples
  66        names : list
  67            list of strings labeling the individual samples
  68        idl : list, optional
  69            list of ranges or lists on which the samples are defined
  70        """
  71
  72        if kwargs.get("means") is None and len(samples):
  73            if len(samples) != len(names):
  74                raise Exception('Length of samples and names incompatible.')
  75            if idl is not None:
  76                if len(idl) != len(names):
  77                    raise Exception('Length of idl incompatible with samples and names.')
  78            name_length = len(names)
  79            if name_length > 1:
  80                if name_length != len(set(names)):
  81                    raise Exception('names are not unique.')
  82                if not all(isinstance(x, str) for x in names):
  83                    raise TypeError('All names have to be strings.')
  84            else:
  85                if not isinstance(names[0], str):
  86                    raise TypeError('All names have to be strings.')
  87            if min(len(x) for x in samples) <= 4:
  88                raise Exception('Samples have to have at least 5 entries.')
  89
  90        self.names = sorted(names)
  91        self.shape = {}
  92        self.r_values = {}
  93        self.deltas = {}
  94        self._covobs = {}
  95
  96        self._value = 0
  97        self.N = 0
  98        self.is_merged = {}
  99        self.idl = {}
 100        if idl is not None:
 101            for name, idx in sorted(zip(names, idl)):
 102                if isinstance(idx, range):
 103                    self.idl[name] = idx
 104                elif isinstance(idx, (list, np.ndarray)):
 105                    dc = np.unique(np.diff(idx))
 106                    if np.any(dc < 0):
 107                        raise Exception("Unsorted idx for idl[%s]" % (name))
 108                    if len(dc) == 1:
 109                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 110                    else:
 111                        self.idl[name] = list(idx)
 112                else:
 113                    raise Exception('incompatible type for idl[%s].' % (name))
 114        else:
 115            for name, sample in sorted(zip(names, samples)):
 116                self.idl[name] = range(1, len(sample) + 1)
 117
 118        if kwargs.get("means") is not None:
 119            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 120                self.shape[name] = len(self.idl[name])
 121                self.N += self.shape[name]
 122                self.r_values[name] = mean
 123                self.deltas[name] = sample
 124        else:
 125            for name, sample in sorted(zip(names, samples)):
 126                self.shape[name] = len(self.idl[name])
 127                self.N += self.shape[name]
 128                if len(sample) != self.shape[name]:
 129                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 130                self.r_values[name] = np.mean(sample)
 131                self.deltas[name] = sample - self.r_values[name]
 132                self._value += self.shape[name] * self.r_values[name]
 133            self._value /= self.N
 134
 135        self._dvalue = 0.0
 136        self.ddvalue = 0.0
 137        self.reweighted = False
 138
 139        self.tag = None
 140
 141    @property
 142    def value(self):
 143        return self._value
 144
 145    @property
 146    def dvalue(self):
 147        return self._dvalue
 148
 149    @property
 150    def e_names(self):
 151        return sorted(set([o.split('|')[0] for o in self.names]))
 152
 153    @property
 154    def cov_names(self):
 155        return sorted(set([o for o in self.covobs.keys()]))
 156
 157    @property
 158    def mc_names(self):
 159        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 160
 161    @property
 162    def e_content(self):
 163        res = {}
 164        for e, e_name in enumerate(self.e_names):
 165            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 166            if e_name in self.names:
 167                res[e_name].append(e_name)
 168        return res
 169
 170    @property
 171    def covobs(self):
 172        return self._covobs
 173
 174    def gamma_method(self, **kwargs):
 175        """Estimate the error and related properties of the Obs.
 176
 177        Parameters
 178        ----------
 179        S : float
 180            specifies a custom value for the parameter S (default 2.0).
 181            If set to 0 it is assumed that the data exhibits no
 182            autocorrelation. In this case the error estimates coincides
 183            with the sample standard error.
 184        tau_exp : float
 185            positive value triggers the critical slowing down analysis
 186            (default 0.0).
 187        N_sigma : float
 188            number of standard deviations from zero until the tail is
 189            attached to the autocorrelation function (default 1).
 190        fft : bool
 191            determines whether the fft algorithm is used for the computation
 192            of the autocorrelation function (default True)
 193        """
 194
 195        e_content = self.e_content
 196        self.e_dvalue = {}
 197        self.e_ddvalue = {}
 198        self.e_tauint = {}
 199        self.e_dtauint = {}
 200        self.e_windowsize = {}
 201        self.e_n_tauint = {}
 202        self.e_n_dtauint = {}
 203        e_gamma = {}
 204        self.e_rho = {}
 205        self.e_drho = {}
 206        self._dvalue = 0
 207        self.ddvalue = 0
 208
 209        self.S = {}
 210        self.tau_exp = {}
 211        self.N_sigma = {}
 212
 213        if kwargs.get('fft') is False:
 214            fft = False
 215        else:
 216            fft = True
 217
 218        def _parse_kwarg(kwarg_name):
 219            if kwarg_name in kwargs:
 220                tmp = kwargs.get(kwarg_name)
 221                if isinstance(tmp, (int, float)):
 222                    if tmp < 0:
 223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 224                    for e, e_name in enumerate(self.e_names):
 225                        getattr(self, kwarg_name)[e_name] = tmp
 226                else:
 227                    raise TypeError(kwarg_name + ' is not in proper format.')
 228            else:
 229                for e, e_name in enumerate(self.e_names):
 230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 232                    else:
 233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 234
 235        _parse_kwarg('S')
 236        _parse_kwarg('tau_exp')
 237        _parse_kwarg('N_sigma')
 238
 239        for e, e_name in enumerate(self.mc_names):
 240            r_length = []
 241            for r_name in e_content[e_name]:
 242                if isinstance(self.idl[r_name], range):
 243                    r_length.append(len(self.idl[r_name]))
 244                else:
 245                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
 246
 247            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 248            w_max = max(r_length) // 2
 249            e_gamma[e_name] = np.zeros(w_max)
 250            self.e_rho[e_name] = np.zeros(w_max)
 251            self.e_drho[e_name] = np.zeros(w_max)
 252
 253            for r_name in e_content[e_name]:
 254                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
 255
 256            gamma_div = np.zeros(w_max)
 257            for r_name in e_content[e_name]:
 258                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
 259            gamma_div[gamma_div < 1] = 1.0
 260            e_gamma[e_name] /= gamma_div[:w_max]
 261
 262            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 263                self.e_tauint[e_name] = 0.5
 264                self.e_dtauint[e_name] = 0.0
 265                self.e_dvalue[e_name] = 0.0
 266                self.e_ddvalue[e_name] = 0.0
 267                self.e_windowsize[e_name] = 0
 268                continue
 269
 270            gaps = []
 271            for r_name in e_content[e_name]:
 272                if isinstance(self.idl[r_name], range):
 273                    gaps.append(1)
 274                else:
 275                    gaps.append(np.min(np.diff(self.idl[r_name])))
 276
 277            if not np.all([gi == gaps[0] for gi in gaps]):
 278                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
 279            else:
 280                gapsize = gaps[0]
 281
 282            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 283            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 284            # Make sure no entry of tauint is smaller than 0.5
 285            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 286            # hep-lat/0306017 eq. (42)
 287            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
 288            self.e_n_dtauint[e_name][0] = 0.0
 289
 290            def _compute_drho(i):
 291                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
 292                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 293
 294            _compute_drho(gapsize)
 295            if self.tau_exp[e_name] > 0:
 296                texp = self.tau_exp[e_name]
 297                # Critical slowing down analysis
 298                if w_max // 2 <= 1:
 299                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 300                for n in range(gapsize, w_max // 2, gapsize):
 301                    _compute_drho(n + gapsize)
 302                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 303                        # Bias correction hep-lat/0306017 eq. (49) included
 304                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 305                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 306                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 307                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 308                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
 309                        self.e_windowsize[e_name] = n
 310                        break
 311            else:
 312                if self.S[e_name] == 0.0:
 313                    self.e_tauint[e_name] = 0.5
 314                    self.e_dtauint[e_name] = 0.0
 315                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 316                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 317                    self.e_windowsize[e_name] = 0
 318                else:
 319                    # Standard automatic windowing procedure
 320                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
 321                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 322                    for n in range(1, w_max):
 323                        if n < w_max // 2 - 2:
 324                            _compute_drho(gapsize * n + gapsize)
 325                        if g_w[n - 1] < 0 or n >= w_max - 1:
 326                            n *= gapsize
 327                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 328                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 329                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 330                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
 331                            self.e_windowsize[e_name] = n
 332                            break
 333
 334            self._dvalue += self.e_dvalue[e_name] ** 2
 335            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 336
 337        for e_name in self.cov_names:
 338            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 339            self.e_ddvalue[e_name] = 0
 340            self._dvalue += self.e_dvalue[e_name]**2
 341
 342        self._dvalue = np.sqrt(self._dvalue)
 343        if self._dvalue == 0.0:
 344            self.ddvalue = 0.0
 345        else:
 346            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 347        return
 348
 349    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
 350        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 351           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 352
 353        Parameters
 354        ----------
 355        deltas : list
 356            List of fluctuations
 357        idx : list
 358            List or range of configurations on which the deltas are defined.
 359        shape : int
 360            Number of configurations in idx.
 361        w_max : int
 362            Upper bound for the summation window.
 363        fft : bool
 364            determines whether the fft algorithm is used for the computation
 365            of the autocorrelation function.
 366        """
 367        gamma = np.zeros(w_max)
 368        deltas = _expand_deltas(deltas, idx, shape)
 369        new_shape = len(deltas)
 370        if fft:
 371            max_gamma = min(new_shape, w_max)
 372            # The padding for the fft has to be even
 373            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 374            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 375        else:
 376            for n in range(w_max):
 377                if new_shape - n >= 0:
 378                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 379
 380        return gamma
 381
 382    def details(self, ens_content=True):
 383        """Output detailed properties of the Obs.
 384
 385        Parameters
 386        ----------
 387        ens_content : bool
 388            print details about the ensembles and replica if true.
 389        """
 390        if self.tag is not None:
 391            print("Description:", self.tag)
 392        if not hasattr(self, 'e_dvalue'):
 393            print('Result\t %3.8e' % (self.value))
 394        else:
 395            if self.value == 0.0:
 396                percentage = np.nan
 397            else:
 398                percentage = np.abs(self._dvalue / self.value) * 100
 399            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 400            if len(self.e_names) > 1:
 401                print(' Ensemble errors:')
 402            e_content = self.e_content
 403            for e_name in self.mc_names:
 404                if isinstance(self.idl[e_content[e_name][0]], range):
 405                    gap = self.idl[e_content[e_name][0]].step
 406                else:
 407                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
 408
 409                if len(self.e_names) > 1:
 410                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 411                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 412                tau_string += f" in units of {gap} config"
 413                if gap > 1:
 414                    tau_string += "s"
 415                if self.tau_exp[e_name] > 0:
 416                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 417                else:
 418                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 419                print(tau_string)
 420            for e_name in self.cov_names:
 421                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 422        if ens_content is True:
 423            if len(self.e_names) == 1:
 424                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 425            else:
 426                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 427            my_string_list = []
 428            for key, value in sorted(self.e_content.items()):
 429                if key not in self.covobs:
 430                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 431                    if len(value) == 1:
 432                        my_string += f': {self.shape[value[0]]} configurations'
 433                        if isinstance(self.idl[value[0]], range):
 434                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 435                        else:
 436                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 437                    else:
 438                        sublist = []
 439                        for v in value:
 440                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 441                            my_substring += f': {self.shape[v]} configurations'
 442                            if isinstance(self.idl[v], range):
 443                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 444                            else:
 445                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 446                            sublist.append(my_substring)
 447
 448                        my_string += '\n' + '\n'.join(sublist)
 449                else:
 450                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 451                my_string_list.append(my_string)
 452            print('\n'.join(my_string_list))
 453
 454    def reweight(self, weight):
 455        """Reweight the obs with given rewighting factors.
 456
 457        Parameters
 458        ----------
 459        weight : Obs
 460            Reweighting factor. An Observable that has to be defined on a superset of the
 461            configurations in obs[i].idl for all i.
 462        all_configs : bool
 463            if True, the reweighted observables are normalized by the average of
 464            the reweighting factor on all configurations in weight.idl and not
 465            on the configurations in obs[i].idl. Default False.
 466        """
 467        return reweight(weight, [self])[0]
 468
 469    def is_zero_within_error(self, sigma=1):
 470        """Checks whether the observable is zero within 'sigma' standard errors.
 471
 472        Parameters
 473        ----------
 474        sigma : int
 475            Number of standard errors used for the check.
 476
 477        Works only properly when the gamma method was run.
 478        """
 479        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 480
 481    def is_zero(self, atol=1e-10):
 482        """Checks whether the observable is zero within a given tolerance.
 483
 484        Parameters
 485        ----------
 486        atol : float
 487            Absolute tolerance (for details see numpy documentation).
 488        """
 489        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 490
 491    def plot_tauint(self, save=None):
 492        """Plot integrated autocorrelation time for each ensemble.
 493
 494        Parameters
 495        ----------
 496        save : str
 497            saves the figure to a file named 'save' if.
 498        """
 499        if not hasattr(self, 'e_dvalue'):
 500            raise Exception('Run the gamma method first.')
 501
 502        for e, e_name in enumerate(self.mc_names):
 503            fig = plt.figure()
 504            plt.xlabel(r'$W$')
 505            plt.ylabel(r'$\tau_\mathrm{int}$')
 506            length = int(len(self.e_n_tauint[e_name]))
 507            if self.tau_exp[e_name] > 0:
 508                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 509                x_help = np.arange(2 * self.tau_exp[e_name])
 510                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 511                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 512                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 513                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 514                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 515                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 516                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 517            else:
 518                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 519                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 520
 521            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 522            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 523            plt.legend()
 524            plt.xlim(-0.5, xmax)
 525            ylim = plt.ylim()
 526            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 527            plt.draw()
 528            if save:
 529                fig.savefig(save + "_" + str(e))
 530
 531    def plot_rho(self, save=None):
 532        """Plot normalized autocorrelation function time for each ensemble.
 533
 534        Parameters
 535        ----------
 536        save : str
 537            saves the figure to a file named 'save' if.
 538        """
 539        if not hasattr(self, 'e_dvalue'):
 540            raise Exception('Run the gamma method first.')
 541        for e, e_name in enumerate(self.mc_names):
 542            fig = plt.figure()
 543            plt.xlabel('W')
 544            plt.ylabel('rho')
 545            length = int(len(self.e_drho[e_name]))
 546            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 547            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 548            if self.tau_exp[e_name] > 0:
 549                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 550                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 551                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 552                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 553            else:
 554                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 555                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 556            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 557            plt.xlim(-0.5, xmax)
 558            plt.draw()
 559            if save:
 560                fig.savefig(save + "_" + str(e))
 561
 562    def plot_rep_dist(self):
 563        """Plot replica distribution for each ensemble with more than one replicum."""
 564        if not hasattr(self, 'e_dvalue'):
 565            raise Exception('Run the gamma method first.')
 566        for e, e_name in enumerate(self.mc_names):
 567            if len(self.e_content[e_name]) == 1:
 568                print('No replica distribution for a single replicum (', e_name, ')')
 569                continue
 570            r_length = []
 571            sub_r_mean = 0
 572            for r, r_name in enumerate(self.e_content[e_name]):
 573                r_length.append(len(self.deltas[r_name]))
 574                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 575            e_N = np.sum(r_length)
 576            sub_r_mean /= e_N
 577            arr = np.zeros(len(self.e_content[e_name]))
 578            for r, r_name in enumerate(self.e_content[e_name]):
 579                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 580            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 581            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 582            plt.draw()
 583
 584    def plot_history(self, expand=True):
 585        """Plot derived Monte Carlo history for each ensemble
 586
 587        Parameters
 588        ----------
 589        expand : bool
 590            show expanded history for irregular Monte Carlo chains (default: True).
 591        """
 592        for e, e_name in enumerate(self.mc_names):
 593            plt.figure()
 594            r_length = []
 595            tmp = []
 596            tmp_expanded = []
 597            for r, r_name in enumerate(self.e_content[e_name]):
 598                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 599                if expand:
 600                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
 601                    r_length.append(len(tmp_expanded[-1]))
 602                else:
 603                    r_length.append(len(tmp[-1]))
 604            e_N = np.sum(r_length)
 605            x = np.arange(e_N)
 606            y_test = np.concatenate(tmp, axis=0)
 607            if expand:
 608                y = np.concatenate(tmp_expanded, axis=0)
 609            else:
 610                y = y_test
 611            plt.errorbar(x, y, fmt='.', markersize=3)
 612            plt.xlim(-0.5, e_N - 0.5)
 613            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 614            plt.draw()
 615
 616    def plot_piechart(self, save=None):
 617        """Plot piechart which shows the fractional contribution of each
 618        ensemble to the error and returns a dictionary containing the fractions.
 619
 620        Parameters
 621        ----------
 622        save : str
 623            saves the figure to a file named 'save' if.
 624        """
 625        if not hasattr(self, 'e_dvalue'):
 626            raise Exception('Run the gamma method first.')
 627        if np.isclose(0.0, self._dvalue, atol=1e-15):
 628            raise Exception('Error is 0.0')
 629        labels = self.e_names
 630        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 631        fig1, ax1 = plt.subplots()
 632        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 633        ax1.axis('equal')
 634        plt.draw()
 635        if save:
 636            fig1.savefig(save)
 637
 638        return dict(zip(self.e_names, sizes))
 639
 640    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 641        """Dump the Obs to a file 'name' of chosen format.
 642
 643        Parameters
 644        ----------
 645        filename : str
 646            name of the file to be saved.
 647        datatype : str
 648            Format of the exported file. Supported formats include
 649            "json.gz" and "pickle"
 650        description : str
 651            Description for output file, only relevant for json.gz format.
 652        path : str
 653            specifies a custom path for the file (default '.')
 654        """
 655        if 'path' in kwargs:
 656            file_name = kwargs.get('path') + '/' + filename
 657        else:
 658            file_name = filename
 659
 660        if datatype == "json.gz":
 661            from .input.json import dump_to_json
 662            dump_to_json([self], file_name, description=description)
 663        elif datatype == "pickle":
 664            with open(file_name + '.p', 'wb') as fb:
 665                pickle.dump(self, fb)
 666        else:
 667            raise Exception("Unknown datatype " + str(datatype))
 668
 669    def export_jackknife(self):
 670        """Export jackknife samples from the Obs
 671
 672        Returns
 673        -------
 674        numpy.ndarray
 675            Returns a numpy array of length N + 1 where N is the number of samples
 676            for the given ensemble and replicum. The zeroth entry of the array contains
 677            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 678            derived from the Obs. The current implementation only works for observables
 679            defined on exactly one ensemble and replicum. The derived jackknife samples
 680            should agree with samples from a full jackknife analysis up to O(1/N).
 681        """
 682
 683        if len(self.names) != 1:
 684            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 685
 686        name = self.names[0]
 687        full_data = self.deltas[name] + self.r_values[name]
 688        n = full_data.size
 689        mean = self.value
 690        tmp_jacks = np.zeros(n + 1)
 691        tmp_jacks[0] = mean
 692        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 693        return tmp_jacks
 694
 695    def __float__(self):
 696        return float(self.value)
 697
 698    def __repr__(self):
 699        return 'Obs[' + str(self) + ']'
 700
 701    def __str__(self):
 702        return _format_uncertainty(self.value, self._dvalue)
 703
 704    def __hash__(self):
 705        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 706        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 707        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 708        hash_tuple += tuple([o.encode() for o in self.names])
 709        m = hashlib.md5()
 710        [m.update(o) for o in hash_tuple]
 711        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 712
 713    # Overload comparisons
 714    def __lt__(self, other):
 715        return self.value < other
 716
 717    def __le__(self, other):
 718        return self.value <= other
 719
 720    def __gt__(self, other):
 721        return self.value > other
 722
 723    def __ge__(self, other):
 724        return self.value >= other
 725
 726    def __eq__(self, other):
 727        return (self - other).is_zero()
 728
 729    def __ne__(self, other):
 730        return not (self - other).is_zero()
 731
 732    # Overload math operations
 733    def __add__(self, y):
 734        if isinstance(y, Obs):
 735            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 736        else:
 737            if isinstance(y, np.ndarray):
 738                return np.array([self + o for o in y])
 739            elif y.__class__.__name__ in ['Corr', 'CObs']:
 740                return NotImplemented
 741            else:
 742                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 743
 744    def __radd__(self, y):
 745        return self + y
 746
 747    def __mul__(self, y):
 748        if isinstance(y, Obs):
 749            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 750        else:
 751            if isinstance(y, np.ndarray):
 752                return np.array([self * o for o in y])
 753            elif isinstance(y, complex):
 754                return CObs(self * y.real, self * y.imag)
 755            elif y.__class__.__name__ in ['Corr', 'CObs']:
 756                return NotImplemented
 757            else:
 758                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 759
 760    def __rmul__(self, y):
 761        return self * y
 762
 763    def __sub__(self, y):
 764        if isinstance(y, Obs):
 765            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 766        else:
 767            if isinstance(y, np.ndarray):
 768                return np.array([self - o for o in y])
 769            elif y.__class__.__name__ in ['Corr', 'CObs']:
 770                return NotImplemented
 771            else:
 772                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 773
 774    def __rsub__(self, y):
 775        return -1 * (self - y)
 776
 777    def __pos__(self):
 778        return self
 779
 780    def __neg__(self):
 781        return -1 * self
 782
 783    def __truediv__(self, y):
 784        if isinstance(y, Obs):
 785            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 786        else:
 787            if isinstance(y, np.ndarray):
 788                return np.array([self / o for o in y])
 789            elif y.__class__.__name__ in ['Corr', 'CObs']:
 790                return NotImplemented
 791            else:
 792                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 793
 794    def __rtruediv__(self, y):
 795        if isinstance(y, Obs):
 796            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 797        else:
 798            if isinstance(y, np.ndarray):
 799                return np.array([o / self for o in y])
 800            elif y.__class__.__name__ in ['Corr', 'CObs']:
 801                return NotImplemented
 802            else:
 803                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 804
 805    def __pow__(self, y):
 806        if isinstance(y, Obs):
 807            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 808        else:
 809            return derived_observable(lambda x: x[0] ** y, [self])
 810
 811    def __rpow__(self, y):
 812        if isinstance(y, Obs):
 813            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 814        else:
 815            return derived_observable(lambda x: y ** x[0], [self])
 816
 817    def __abs__(self):
 818        return derived_observable(lambda x: anp.abs(x[0]), [self])
 819
 820    # Overload numpy functions
 821    def sqrt(self):
 822        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 823
 824    def log(self):
 825        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 826
 827    def exp(self):
 828        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 829
 830    def sin(self):
 831        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 832
 833    def cos(self):
 834        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 835
 836    def tan(self):
 837        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 838
 839    def arcsin(self):
 840        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 841
 842    def arccos(self):
 843        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 844
 845    def arctan(self):
 846        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 847
 848    def sinh(self):
 849        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 850
 851    def cosh(self):
 852        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 853
 854    def tanh(self):
 855        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 856
 857    def arcsinh(self):
 858        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 859
 860    def arccosh(self):
 861        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 862
 863    def arctanh(self):
 864        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 865
 866
 867class CObs:
 868    """Class for a complex valued observable."""
 869    __slots__ = ['_real', '_imag', 'tag']
 870
 871    def __init__(self, real, imag=0.0):
 872        self._real = real
 873        self._imag = imag
 874        self.tag = None
 875
 876    @property
 877    def real(self):
 878        return self._real
 879
 880    @property
 881    def imag(self):
 882        return self._imag
 883
 884    def gamma_method(self, **kwargs):
 885        """Executes the gamma_method for the real and the imaginary part."""
 886        if isinstance(self.real, Obs):
 887            self.real.gamma_method(**kwargs)
 888        if isinstance(self.imag, Obs):
 889            self.imag.gamma_method(**kwargs)
 890
 891    def is_zero(self):
 892        """Checks whether both real and imaginary part are zero within machine precision."""
 893        return self.real == 0.0 and self.imag == 0.0
 894
 895    def conjugate(self):
 896        return CObs(self.real, -self.imag)
 897
 898    def __add__(self, other):
 899        if isinstance(other, np.ndarray):
 900            return other + self
 901        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 902            return CObs(self.real + other.real,
 903                        self.imag + other.imag)
 904        else:
 905            return CObs(self.real + other, self.imag)
 906
 907    def __radd__(self, y):
 908        return self + y
 909
 910    def __sub__(self, other):
 911        if isinstance(other, np.ndarray):
 912            return -1 * (other - self)
 913        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 914            return CObs(self.real - other.real, self.imag - other.imag)
 915        else:
 916            return CObs(self.real - other, self.imag)
 917
 918    def __rsub__(self, other):
 919        return -1 * (self - other)
 920
 921    def __mul__(self, other):
 922        if isinstance(other, np.ndarray):
 923            return other * self
 924        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 925            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 926                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 927                                               [self.real, other.real, self.imag, other.imag],
 928                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 929                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 930                                               [self.real, other.real, self.imag, other.imag],
 931                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 932            elif getattr(other, 'imag', 0) != 0:
 933                return CObs(self.real * other.real - self.imag * other.imag,
 934                            self.imag * other.real + self.real * other.imag)
 935            else:
 936                return CObs(self.real * other.real, self.imag * other.real)
 937        else:
 938            return CObs(self.real * other, self.imag * other)
 939
 940    def __rmul__(self, other):
 941        return self * other
 942
 943    def __truediv__(self, other):
 944        if isinstance(other, np.ndarray):
 945            return 1 / (other / self)
 946        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 947            r = other.real ** 2 + other.imag ** 2
 948            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 949        else:
 950            return CObs(self.real / other, self.imag / other)
 951
 952    def __rtruediv__(self, other):
 953        r = self.real ** 2 + self.imag ** 2
 954        if hasattr(other, 'real') and hasattr(other, 'imag'):
 955            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 956        else:
 957            return CObs(self.real * other / r, -self.imag * other / r)
 958
 959    def __abs__(self):
 960        return np.sqrt(self.real**2 + self.imag**2)
 961
 962    def __pos__(self):
 963        return self
 964
 965    def __neg__(self):
 966        return -1 * self
 967
 968    def __eq__(self, other):
 969        return self.real == other.real and self.imag == other.imag
 970
 971    def __str__(self):
 972        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 973
 974    def __repr__(self):
 975        return 'CObs[' + str(self) + ']'
 976
 977
 978def _format_uncertainty(value, dvalue):
 979    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
 980    if dvalue == 0.0:
 981        return str(value)
 982    fexp = np.floor(np.log10(dvalue))
 983    if fexp < 0.0:
 984        return '{:{form}}({:2.0f})'.format(value, dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
 985    elif fexp == 0.0:
 986        return '{:.1f}({:1.1f})'.format(value, dvalue)
 987    else:
 988        return '{:.0f}({:2.0f})'.format(value, dvalue)
 989
 990
 991def _expand_deltas(deltas, idx, shape):
 992    """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0.
 993       If idx is of type range, the deltas are not changed
 994
 995    Parameters
 996    ----------
 997    deltas : list
 998        List of fluctuations
 999    idx : list
1000        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1001    shape : int
1002        Number of configs in idx.
1003    """
1004    if isinstance(idx, range):
1005        return deltas
1006    else:
1007        ret = np.zeros(idx[-1] - idx[0] + 1)
1008        for i in range(shape):
1009            ret[idx[i] - idx[0]] = deltas[i]
1010        return ret
1011
1012
1013def _merge_idx(idl):
1014    """Returns the union of all lists in idl as sorted list
1015
1016    Parameters
1017    ----------
1018    idl : list
1019        List of lists or ranges.
1020    """
1021
1022    # Use groupby to efficiently check whether all elements of idl are identical
1023    try:
1024        g = groupby(idl)
1025        if next(g, True) and not next(g, False):
1026            return idl[0]
1027    except Exception:
1028        pass
1029
1030    if np.all([type(idx) is range for idx in idl]):
1031        if len(set([idx[0] for idx in idl])) == 1:
1032            idstart = min([idx.start for idx in idl])
1033            idstop = max([idx.stop for idx in idl])
1034            idstep = min([idx.step for idx in idl])
1035            return range(idstart, idstop, idstep)
1036
1037    return sorted(set().union(*idl))
1038
1039
1040def _intersection_idx(idl):
1041    """Returns the intersection of all lists in idl as sorted list
1042
1043    Parameters
1044    ----------
1045    idl : list
1046        List of lists or ranges.
1047    """
1048
1049    def _lcm(*args):
1050        """Returns the lowest common multiple of args.
1051
1052        From python 3.9 onwards the math library contains an lcm function."""
1053        return reduce(lambda a, b: a * b // gcd(a, b), args)
1054
1055    # Use groupby to efficiently check whether all elements of idl are identical
1056    try:
1057        g = groupby(idl)
1058        if next(g, True) and not next(g, False):
1059            return idl[0]
1060    except Exception:
1061        pass
1062
1063    if np.all([type(idx) is range for idx in idl]):
1064        if len(set([idx[0] for idx in idl])) == 1:
1065            idstart = max([idx.start for idx in idl])
1066            idstop = min([idx.stop for idx in idl])
1067            idstep = _lcm(*[idx.step for idx in idl])
1068            return range(idstart, idstop, idstep)
1069
1070    return sorted(set.intersection(*[set(o) for o in idl]))
1071
1072
1073def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1074    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1075       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1076       common divisor of the step sizes is used as new step size.
1077
1078    Parameters
1079    ----------
1080    deltas : list
1081        List of fluctuations
1082    idx : list
1083        List or range of configs on which the deltas are defined.
1084        Has to be a subset of new_idx and has to be sorted in ascending order.
1085    shape : list
1086        Number of configs in idx.
1087    new_idx : list
1088        List of configs that defines the new range, has to be sorted in ascending order.
1089    """
1090
1091    if type(idx) is range and type(new_idx) is range:
1092        if idx == new_idx:
1093            return deltas
1094    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1095    for i in range(shape):
1096        ret[idx[i] - new_idx[0]] = deltas[i]
1097    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
1098
1099
1100def _filter_zeroes(deltas, idx, eps=Obs.filter_eps):
1101    """Filter out all configurations with vanishing fluctuation such that they do not
1102       contribute to the error estimate anymore. Returns the new deltas and
1103       idx according to the filtering.
1104       A fluctuation is considered to be vanishing, if it is smaller than eps times
1105       the mean of the absolute values of all deltas in one list.
1106
1107    Parameters
1108    ----------
1109    deltas : list
1110        List of fluctuations
1111    idx : list
1112        List or ranges of configs on which the deltas are defined.
1113    eps : float
1114        Prefactor that enters the filter criterion.
1115    """
1116    new_deltas = []
1117    new_idx = []
1118    maxd = np.mean(np.fabs(deltas))
1119    for i in range(len(deltas)):
1120        if abs(deltas[i]) > eps * maxd:
1121            new_deltas.append(deltas[i])
1122            new_idx.append(idx[i])
1123    if new_idx:
1124        return np.array(new_deltas), new_idx
1125    else:
1126        return deltas, idx
1127
1128
1129def derived_observable(func, data, array_mode=False, **kwargs):
1130    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1131
1132    Parameters
1133    ----------
1134    func : object
1135        arbitrary function of the form func(data, **kwargs). For the
1136        automatic differentiation to work, all numpy functions have to have
1137        the autograd wrapper (use 'import autograd.numpy as anp').
1138    data : list
1139        list of Obs, e.g. [obs1, obs2, obs3].
1140    num_grad : bool
1141        if True, numerical derivatives are used instead of autograd
1142        (default False). To control the numerical differentiation the
1143        kwargs of numdifftools.step_generators.MaxStepGenerator
1144        can be used.
1145    man_grad : list
1146        manually supply a list or an array which contains the jacobian
1147        of func. Use cautiously, supplying the wrong derivative will
1148        not be intercepted.
1149
1150    Notes
1151    -----
1152    For simple mathematical operations it can be practical to use anonymous
1153    functions. For the ratio of two observables one can e.g. use
1154
1155    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1156    """
1157
1158    data = np.asarray(data)
1159    raveled_data = data.ravel()
1160
1161    # Workaround for matrix operations containing non Obs data
1162    if not all(isinstance(x, Obs) for x in raveled_data):
1163        for i in range(len(raveled_data)):
1164            if isinstance(raveled_data[i], (int, float)):
1165                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1166
1167    allcov = {}
1168    for o in raveled_data:
1169        for name in o.cov_names:
1170            if name in allcov:
1171                if not np.allclose(allcov[name], o.covobs[name].cov):
1172                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1173            else:
1174                allcov[name] = o.covobs[name].cov
1175
1176    n_obs = len(raveled_data)
1177    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1178    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1179    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1180
1181    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1182    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1183
1184    if data.ndim == 1:
1185        values = np.array([o.value for o in data])
1186    else:
1187        values = np.vectorize(lambda x: x.value)(data)
1188
1189    new_values = func(values, **kwargs)
1190
1191    multi = int(isinstance(new_values, np.ndarray))
1192
1193    new_r_values = {}
1194    new_idl_d = {}
1195    for name in new_sample_names:
1196        idl = []
1197        tmp_values = np.zeros(n_obs)
1198        for i, item in enumerate(raveled_data):
1199            tmp_values[i] = item.r_values.get(name, item.value)
1200            tmp_idl = item.idl.get(name)
1201            if tmp_idl is not None:
1202                idl.append(tmp_idl)
1203        if multi > 0:
1204            tmp_values = np.array(tmp_values).reshape(data.shape)
1205        new_r_values[name] = func(tmp_values, **kwargs)
1206        new_idl_d[name] = _merge_idx(idl)
1207        if not is_merged[name]:
1208            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1209
1210    if 'man_grad' in kwargs:
1211        deriv = np.asarray(kwargs.get('man_grad'))
1212        if new_values.shape + data.shape != deriv.shape:
1213            raise Exception('Manual derivative does not have correct shape.')
1214    elif kwargs.get('num_grad') is True:
1215        if multi > 0:
1216            raise Exception('Multi mode currently not supported for numerical derivative')
1217        options = {
1218            'base_step': 0.1,
1219            'step_ratio': 2.5}
1220        for key in options.keys():
1221            kwarg = kwargs.get(key)
1222            if kwarg is not None:
1223                options[key] = kwarg
1224        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1225        if tmp_df.size == 1:
1226            deriv = np.array([tmp_df.real])
1227        else:
1228            deriv = tmp_df.real
1229    else:
1230        deriv = jacobian(func)(values, **kwargs)
1231
1232    final_result = np.zeros(new_values.shape, dtype=object)
1233
1234    if array_mode is True:
1235
1236        class _Zero_grad():
1237            def __init__(self, N):
1238                self.grad = np.zeros((N, 1))
1239
1240        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1241        d_extracted = {}
1242        g_extracted = {}
1243        for name in new_sample_names:
1244            d_extracted[name] = []
1245            ens_length = len(new_idl_d[name])
1246            for i_dat, dat in enumerate(data):
1247                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1248        for name in new_cov_names:
1249            g_extracted[name] = []
1250            zero_grad = _Zero_grad(new_covobs_lengths[name])
1251            for i_dat, dat in enumerate(data):
1252                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1253
1254    for i_val, new_val in np.ndenumerate(new_values):
1255        new_deltas = {}
1256        new_grad = {}
1257        if array_mode is True:
1258            for name in new_sample_names:
1259                ens_length = d_extracted[name][0].shape[-1]
1260                new_deltas[name] = np.zeros(ens_length)
1261                for i_dat, dat in enumerate(d_extracted[name]):
1262                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1263            for name in new_cov_names:
1264                new_grad[name] = 0
1265                for i_dat, dat in enumerate(g_extracted[name]):
1266                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1267        else:
1268            for j_obs, obs in np.ndenumerate(data):
1269                for name in obs.names:
1270                    if name in obs.cov_names:
1271                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1272                    else:
1273                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1274
1275        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1276
1277        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1278            raise Exception('The same name has been used for deltas and covobs!')
1279        new_samples = []
1280        new_means = []
1281        new_idl = []
1282        new_names_obs = []
1283        for name in new_names:
1284            if name not in new_covobs:
1285                if is_merged[name]:
1286                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1287                else:
1288                    filtered_deltas = new_deltas[name]
1289                    filtered_idl_d = new_idl_d[name]
1290
1291                new_samples.append(filtered_deltas)
1292                new_idl.append(filtered_idl_d)
1293                new_means.append(new_r_values[name][i_val])
1294                new_names_obs.append(name)
1295        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1296        for name in new_covobs:
1297            final_result[i_val].names.append(name)
1298        final_result[i_val]._covobs = new_covobs
1299        final_result[i_val]._value = new_val
1300        final_result[i_val].is_merged = is_merged
1301        final_result[i_val].reweighted = reweighted
1302
1303    if multi == 0:
1304        final_result = final_result.item()
1305
1306    return final_result
1307
1308
1309def _reduce_deltas(deltas, idx_old, idx_new):
1310    """Extract deltas defined on idx_old on all configs of idx_new.
1311
1312    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1313    are ordered in an ascending order.
1314
1315    Parameters
1316    ----------
1317    deltas : list
1318        List of fluctuations
1319    idx_old : list
1320        List or range of configs on which the deltas are defined
1321    idx_new : list
1322        List of configs for which we want to extract the deltas.
1323        Has to be a subset of idx_old.
1324    """
1325    if not len(deltas) == len(idx_old):
1326        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1327    if type(idx_old) is range and type(idx_new) is range:
1328        if idx_old == idx_new:
1329            return deltas
1330    # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical
1331    try:
1332        g = groupby([idx_old, idx_new])
1333        if next(g, True) and not next(g, False):
1334            return deltas
1335    except Exception:
1336        pass
1337    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1338    if len(indices) < len(idx_new):
1339        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1340    return np.array(deltas)[indices]
1341
1342
1343def reweight(weight, obs, **kwargs):
1344    """Reweight a list of observables.
1345
1346    Parameters
1347    ----------
1348    weight : Obs
1349        Reweighting factor. An Observable that has to be defined on a superset of the
1350        configurations in obs[i].idl for all i.
1351    obs : list
1352        list of Obs, e.g. [obs1, obs2, obs3].
1353    all_configs : bool
1354        if True, the reweighted observables are normalized by the average of
1355        the reweighting factor on all configurations in weight.idl and not
1356        on the configurations in obs[i].idl. Default False.
1357    """
1358    result = []
1359    for i in range(len(obs)):
1360        if len(obs[i].cov_names):
1361            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1362        if not set(obs[i].names).issubset(weight.names):
1363            raise Exception('Error: Ensembles do not fit')
1364        for name in obs[i].names:
1365            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1366                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1367        new_samples = []
1368        w_deltas = {}
1369        for name in sorted(obs[i].names):
1370            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1371            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1372        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1373
1374        if kwargs.get('all_configs'):
1375            new_weight = weight
1376        else:
1377            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1378
1379        result.append(tmp_obs / new_weight)
1380        result[-1].reweighted = True
1381        result[-1].is_merged = obs[i].is_merged
1382
1383    return result
1384
1385
1386def correlate(obs_a, obs_b):
1387    """Correlate two observables.
1388
1389    Parameters
1390    ----------
1391    obs_a : Obs
1392        First observable
1393    obs_b : Obs
1394        Second observable
1395
1396    Notes
1397    -----
1398    Keep in mind to only correlate primary observables which have not been reweighted
1399    yet. The reweighting has to be applied after correlating the observables.
1400    Currently only works if ensembles are identical (this is not strictly necessary).
1401    """
1402
1403    if sorted(obs_a.names) != sorted(obs_b.names):
1404        raise Exception('Ensembles do not fit')
1405    if len(obs_a.cov_names) or len(obs_b.cov_names):
1406        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1407    for name in obs_a.names:
1408        if obs_a.shape[name] != obs_b.shape[name]:
1409            raise Exception('Shapes of ensemble', name, 'do not fit')
1410        if obs_a.idl[name] != obs_b.idl[name]:
1411            raise Exception('idl of ensemble', name, 'do not fit')
1412
1413    if obs_a.reweighted is True:
1414        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1415    if obs_b.reweighted is True:
1416        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1417
1418    new_samples = []
1419    new_idl = []
1420    for name in sorted(obs_a.names):
1421        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1422        new_idl.append(obs_a.idl[name])
1423
1424    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1425    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1426    o.reweighted = obs_a.reweighted or obs_b.reweighted
1427    return o
1428
1429
1430def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1431    r'''Calculates the error covariance matrix of a set of observables.
1432
1433    WARNING: This function should be used with care, especially for observables with support on multiple
1434             ensembles with differing autocorrelations. See the notes below for details.
1435
1436    The gamma method has to be applied first to all observables.
1437
1438    Parameters
1439    ----------
1440    obs : list or numpy.ndarray
1441        List or one dimensional array of Obs
1442    visualize : bool
1443        If True plots the corresponding normalized correlation matrix (default False).
1444    correlation : bool
1445        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1446    smooth : None or int
1447        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1448        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1449        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1450        small ones.
1451
1452    Notes
1453    -----
1454    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1455    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1456    in the absence of autocorrelation.
1457    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1458    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1459    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1460    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1461    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1462    '''
1463
1464    length = len(obs)
1465
1466    max_samples = np.max([o.N for o in obs])
1467    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1468        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1469
1470    cov = np.zeros((length, length))
1471    for i in range(length):
1472        for j in range(i, length):
1473            cov[i, j] = _covariance_element(obs[i], obs[j])
1474    cov = cov + cov.T - np.diag(np.diag(cov))
1475
1476    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1477
1478    if isinstance(smooth, int):
1479        corr = _smooth_eigenvalues(corr, smooth)
1480
1481    if visualize:
1482        plt.matshow(corr, vmin=-1, vmax=1)
1483        plt.set_cmap('RdBu')
1484        plt.colorbar()
1485        plt.draw()
1486
1487    if correlation is True:
1488        return corr
1489
1490    errors = [o.dvalue for o in obs]
1491    cov = np.diag(errors) @ corr @ np.diag(errors)
1492
1493    eigenvalues = np.linalg.eigh(cov)[0]
1494    if not np.all(eigenvalues >= 0):
1495        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1496
1497    return cov
1498
1499
1500def _smooth_eigenvalues(corr, E):
1501    """Eigenvalue smoothing as described in hep-lat/9412087
1502
1503    corr : np.ndarray
1504        correlation matrix
1505    E : integer
1506        Number of eigenvalues to be left substantially unchanged
1507    """
1508    if not (2 < E < corr.shape[0] - 1):
1509        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1510    vals, vec = np.linalg.eigh(corr)
1511    lambda_min = np.mean(vals[:-E])
1512    vals[vals < lambda_min] = lambda_min
1513    vals /= np.mean(vals)
1514    return vec @ np.diag(vals) @ vec.T
1515
1516
1517def _covariance_element(obs1, obs2):
1518    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1519
1520    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1521        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1522        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1523        return np.sum(deltas1 * deltas2)
1524
1525    if set(obs1.names).isdisjoint(set(obs2.names)):
1526        return 0.0
1527
1528    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1529        raise Exception('The gamma method has to be applied to both Obs first.')
1530
1531    dvalue = 0.0
1532
1533    for e_name in obs1.mc_names:
1534
1535        if e_name not in obs2.mc_names:
1536            continue
1537
1538        idl_d = {}
1539        for r_name in obs1.e_content[e_name]:
1540            if r_name not in obs2.e_content[e_name]:
1541                continue
1542            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1543
1544        gamma = 0.0
1545
1546        for r_name in obs1.e_content[e_name]:
1547            if r_name not in obs2.e_content[e_name]:
1548                continue
1549            if len(idl_d[r_name]) == 0:
1550                continue
1551            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1552
1553        if gamma == 0.0:
1554            continue
1555
1556        gamma_div = 0.0
1557        for r_name in obs1.e_content[e_name]:
1558            if r_name not in obs2.e_content[e_name]:
1559                continue
1560            if len(idl_d[r_name]) == 0:
1561                continue
1562            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1563        gamma /= gamma_div
1564
1565        dvalue += gamma
1566
1567    for e_name in obs1.cov_names:
1568
1569        if e_name not in obs2.cov_names:
1570            continue
1571
1572        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1573
1574    return dvalue
1575
1576
1577def import_jackknife(jacks, name, idl=None):
1578    """Imports jackknife samples and returns an Obs
1579
1580    Parameters
1581    ----------
1582    jacks : numpy.ndarray
1583        numpy array containing the mean value as zeroth entry and
1584        the N jackknife samples as first to Nth entry.
1585    name : str
1586        name of the ensemble the samples are defined on.
1587    """
1588    length = len(jacks) - 1
1589    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1590    samples = jacks[1:] @ prj
1591    mean = np.mean(samples)
1592    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1593    new_obs._value = jacks[0]
1594    return new_obs
1595
1596
1597def merge_obs(list_of_obs):
1598    """Combine all observables in list_of_obs into one new observable
1599
1600    Parameters
1601    ----------
1602    list_of_obs : list
1603        list of the Obs object to be combined
1604
1605    Notes
1606    -----
1607    It is not possible to combine obs which are based on the same replicum
1608    """
1609    replist = [item for obs in list_of_obs for item in obs.names]
1610    if (len(replist) == len(set(replist))) is False:
1611        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1612    if any([len(o.cov_names) for o in list_of_obs]):
1613        raise Exception('Not possible to merge data that contains covobs!')
1614    new_dict = {}
1615    idl_dict = {}
1616    for o in list_of_obs:
1617        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1618                        for key in set(o.deltas) | set(o.r_values)})
1619        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1620
1621    names = sorted(new_dict.keys())
1622    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1623    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1624    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1625    return o
1626
1627
1628def cov_Obs(means, cov, name, grad=None):
1629    """Create an Obs based on mean(s) and a covariance matrix
1630
1631    Parameters
1632    ----------
1633    mean : list of floats or float
1634        N mean value(s) of the new Obs
1635    cov : list or array
1636        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1637    name : str
1638        identifier for the covariance matrix
1639    grad : list or array
1640        Gradient of the Covobs wrt. the means belonging to cov.
1641    """
1642
1643    def covobs_to_obs(co):
1644        """Make an Obs out of a Covobs
1645
1646        Parameters
1647        ----------
1648        co : Covobs
1649            Covobs to be embedded into the Obs
1650        """
1651        o = Obs([], [], means=[])
1652        o._value = co.value
1653        o.names.append(co.name)
1654        o._covobs[co.name] = co
1655        o._dvalue = np.sqrt(co.errsq())
1656        return o
1657
1658    ol = []
1659    if isinstance(means, (float, int)):
1660        means = [means]
1661
1662    for i in range(len(means)):
1663        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1664    if ol[0].covobs[name].N != len(means):
1665        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1666    if len(ol) == 1:
1667        return ol[0]
1668    return ol

class CObs: View Source

868class CObs:
869    """Class for a complex valued observable."""
870    __slots__ = ['_real', '_imag', 'tag']
871
872    def __init__(self, real, imag=0.0):
873        self._real = real
874        self._imag = imag
875        self.tag = None
876
877    @property
878    def real(self):
879        return self._real
880
881    @property
882    def imag(self):
883        return self._imag
884
885    def gamma_method(self, **kwargs):
886        """Executes the gamma_method for the real and the imaginary part."""
887        if isinstance(self.real, Obs):
888            self.real.gamma_method(**kwargs)
889        if isinstance(self.imag, Obs):
890            self.imag.gamma_method(**kwargs)
891
892    def is_zero(self):
893        """Checks whether both real and imaginary part are zero within machine precision."""
894        return self.real == 0.0 and self.imag == 0.0
895
896    def conjugate(self):
897        return CObs(self.real, -self.imag)
898
899    def __add__(self, other):
900        if isinstance(other, np.ndarray):
901            return other + self
902        elif hasattr(other, 'real') and hasattr(other, 'imag'):
903            return CObs(self.real + other.real,
904                        self.imag + other.imag)
905        else:
906            return CObs(self.real + other, self.imag)
907
908    def __radd__(self, y):
909        return self + y
910
911    def __sub__(self, other):
912        if isinstance(other, np.ndarray):
913            return -1 * (other - self)
914        elif hasattr(other, 'real') and hasattr(other, 'imag'):
915            return CObs(self.real - other.real, self.imag - other.imag)
916        else:
917            return CObs(self.real - other, self.imag)
918
919    def __rsub__(self, other):
920        return -1 * (self - other)
921
922    def __mul__(self, other):
923        if isinstance(other, np.ndarray):
924            return other * self
925        elif hasattr(other, 'real') and hasattr(other, 'imag'):
926            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
927                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
928                                               [self.real, other.real, self.imag, other.imag],
929                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
930                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
931                                               [self.real, other.real, self.imag, other.imag],
932                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
933            elif getattr(other, 'imag', 0) != 0:
934                return CObs(self.real * other.real - self.imag * other.imag,
935                            self.imag * other.real + self.real * other.imag)
936            else:
937                return CObs(self.real * other.real, self.imag * other.real)
938        else:
939            return CObs(self.real * other, self.imag * other)
940
941    def __rmul__(self, other):
942        return self * other
943
944    def __truediv__(self, other):
945        if isinstance(other, np.ndarray):
946            return 1 / (other / self)
947        elif hasattr(other, 'real') and hasattr(other, 'imag'):
948            r = other.real ** 2 + other.imag ** 2
949            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
950        else:
951            return CObs(self.real / other, self.imag / other)
952
953    def __rtruediv__(self, other):
954        r = self.real ** 2 + self.imag ** 2
955        if hasattr(other, 'real') and hasattr(other, 'imag'):
956            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
957        else:
958            return CObs(self.real * other / r, -self.imag * other / r)
959
960    def __abs__(self):
961        return np.sqrt(self.real**2 + self.imag**2)
962
963    def __pos__(self):
964        return self
965
966    def __neg__(self):
967        return -1 * self
968
969    def __eq__(self, other):
970        return self.real == other.real and self.imag == other.imag
971
972    def __str__(self):
973        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
974
975    def __repr__(self):
976        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0) View Source

872    def __init__(self, real, imag=0.0):
873        self._real = real
874        self._imag = imag
875        self.tag = None

def gamma_method(self, **kwargs): View Source

885    def gamma_method(self, **kwargs):
886        """Executes the gamma_method for the real and the imaginary part."""
887        if isinstance(self.real, Obs):
888            self.real.gamma_method(**kwargs)
889        if isinstance(self.imag, Obs):
890            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self): View Source

892    def is_zero(self):
893        """Checks whether both real and imaginary part are zero within machine precision."""
894        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self): View Source

896    def conjugate(self):
897        return CObs(self.real, -self.imag)

def derived_observable(func, data, array_mode=False, **kwargs): View Source

1130def derived_observable(func, data, array_mode=False, **kwargs):
1131    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1132
1133    Parameters
1134    ----------
1135    func : object
1136        arbitrary function of the form func(data, **kwargs). For the
1137        automatic differentiation to work, all numpy functions have to have
1138        the autograd wrapper (use 'import autograd.numpy as anp').
1139    data : list
1140        list of Obs, e.g. [obs1, obs2, obs3].
1141    num_grad : bool
1142        if True, numerical derivatives are used instead of autograd
1143        (default False). To control the numerical differentiation the
1144        kwargs of numdifftools.step_generators.MaxStepGenerator
1145        can be used.
1146    man_grad : list
1147        manually supply a list or an array which contains the jacobian
1148        of func. Use cautiously, supplying the wrong derivative will
1149        not be intercepted.
1150
1151    Notes
1152    -----
1153    For simple mathematical operations it can be practical to use anonymous
1154    functions. For the ratio of two observables one can e.g. use
1155
1156    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1157    """
1158
1159    data = np.asarray(data)
1160    raveled_data = data.ravel()
1161
1162    # Workaround for matrix operations containing non Obs data
1163    if not all(isinstance(x, Obs) for x in raveled_data):
1164        for i in range(len(raveled_data)):
1165            if isinstance(raveled_data[i], (int, float)):
1166                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1167
1168    allcov = {}
1169    for o in raveled_data:
1170        for name in o.cov_names:
1171            if name in allcov:
1172                if not np.allclose(allcov[name], o.covobs[name].cov):
1173                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1174            else:
1175                allcov[name] = o.covobs[name].cov
1176
1177    n_obs = len(raveled_data)
1178    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1179    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1180    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1181
1182    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1183    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1184
1185    if data.ndim == 1:
1186        values = np.array([o.value for o in data])
1187    else:
1188        values = np.vectorize(lambda x: x.value)(data)
1189
1190    new_values = func(values, **kwargs)
1191
1192    multi = int(isinstance(new_values, np.ndarray))
1193
1194    new_r_values = {}
1195    new_idl_d = {}
1196    for name in new_sample_names:
1197        idl = []
1198        tmp_values = np.zeros(n_obs)
1199        for i, item in enumerate(raveled_data):
1200            tmp_values[i] = item.r_values.get(name, item.value)
1201            tmp_idl = item.idl.get(name)
1202            if tmp_idl is not None:
1203                idl.append(tmp_idl)
1204        if multi > 0:
1205            tmp_values = np.array(tmp_values).reshape(data.shape)
1206        new_r_values[name] = func(tmp_values, **kwargs)
1207        new_idl_d[name] = _merge_idx(idl)
1208        if not is_merged[name]:
1209            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1210
1211    if 'man_grad' in kwargs:
1212        deriv = np.asarray(kwargs.get('man_grad'))
1213        if new_values.shape + data.shape != deriv.shape:
1214            raise Exception('Manual derivative does not have correct shape.')
1215    elif kwargs.get('num_grad') is True:
1216        if multi > 0:
1217            raise Exception('Multi mode currently not supported for numerical derivative')
1218        options = {
1219            'base_step': 0.1,
1220            'step_ratio': 2.5}
1221        for key in options.keys():
1222            kwarg = kwargs.get(key)
1223            if kwarg is not None:
1224                options[key] = kwarg
1225        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1226        if tmp_df.size == 1:
1227            deriv = np.array([tmp_df.real])
1228        else:
1229            deriv = tmp_df.real
1230    else:
1231        deriv = jacobian(func)(values, **kwargs)
1232
1233    final_result = np.zeros(new_values.shape, dtype=object)
1234
1235    if array_mode is True:
1236
1237        class _Zero_grad():
1238            def __init__(self, N):
1239                self.grad = np.zeros((N, 1))
1240
1241        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1242        d_extracted = {}
1243        g_extracted = {}
1244        for name in new_sample_names:
1245            d_extracted[name] = []
1246            ens_length = len(new_idl_d[name])
1247            for i_dat, dat in enumerate(data):
1248                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1249        for name in new_cov_names:
1250            g_extracted[name] = []
1251            zero_grad = _Zero_grad(new_covobs_lengths[name])
1252            for i_dat, dat in enumerate(data):
1253                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1254
1255    for i_val, new_val in np.ndenumerate(new_values):
1256        new_deltas = {}
1257        new_grad = {}
1258        if array_mode is True:
1259            for name in new_sample_names:
1260                ens_length = d_extracted[name][0].shape[-1]
1261                new_deltas[name] = np.zeros(ens_length)
1262                for i_dat, dat in enumerate(d_extracted[name]):
1263                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1264            for name in new_cov_names:
1265                new_grad[name] = 0
1266                for i_dat, dat in enumerate(g_extracted[name]):
1267                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1268        else:
1269            for j_obs, obs in np.ndenumerate(data):
1270                for name in obs.names:
1271                    if name in obs.cov_names:
1272                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1273                    else:
1274                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1275
1276        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1277
1278        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1279            raise Exception('The same name has been used for deltas and covobs!')
1280        new_samples = []
1281        new_means = []
1282        new_idl = []
1283        new_names_obs = []
1284        for name in new_names:
1285            if name not in new_covobs:
1286                if is_merged[name]:
1287                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1288                else:
1289                    filtered_deltas = new_deltas[name]
1290                    filtered_idl_d = new_idl_d[name]
1291
1292                new_samples.append(filtered_deltas)
1293                new_idl.append(filtered_idl_d)
1294                new_means.append(new_r_values[name][i_val])
1295                new_names_obs.append(name)
1296        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1297        for name in new_covobs:
1298            final_result[i_val].names.append(name)
1299        final_result[i_val]._covobs = new_covobs
1300        final_result[i_val]._value = new_val
1301        final_result[i_val].is_merged = is_merged
1302        final_result[i_val].reweighted = reweighted
1303
1304    if multi == 0:
1305        final_result = final_result.item()
1306
1307    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters

func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
data (list): list of Obs, e.g. [obs1, obs2, obs3].
num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.

Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs): View Source

1344def reweight(weight, obs, **kwargs):
1345    """Reweight a list of observables.
1346
1347    Parameters
1348    ----------
1349    weight : Obs
1350        Reweighting factor. An Observable that has to be defined on a superset of the
1351        configurations in obs[i].idl for all i.
1352    obs : list
1353        list of Obs, e.g. [obs1, obs2, obs3].
1354    all_configs : bool
1355        if True, the reweighted observables are normalized by the average of
1356        the reweighting factor on all configurations in weight.idl and not
1357        on the configurations in obs[i].idl. Default False.
1358    """
1359    result = []
1360    for i in range(len(obs)):
1361        if len(obs[i].cov_names):
1362            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1363        if not set(obs[i].names).issubset(weight.names):
1364            raise Exception('Error: Ensembles do not fit')
1365        for name in obs[i].names:
1366            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1367                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1368        new_samples = []
1369        w_deltas = {}
1370        for name in sorted(obs[i].names):
1371            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1372            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1373        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1374
1375        if kwargs.get('all_configs'):
1376            new_weight = weight
1377        else:
1378            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1379
1380        result.append(tmp_obs / new_weight)
1381        result[-1].reweighted = True
1382        result[-1].is_merged = obs[i].is_merged
1383
1384    return result

Reweight a list of observables.

Parameters

weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
obs (list): list of Obs, e.g. [obs1, obs2, obs3].
all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.

def correlate(obs_a, obs_b): View Source

1387def correlate(obs_a, obs_b):
1388    """Correlate two observables.
1389
1390    Parameters
1391    ----------
1392    obs_a : Obs
1393        First observable
1394    obs_b : Obs
1395        Second observable
1396
1397    Notes
1398    -----
1399    Keep in mind to only correlate primary observables which have not been reweighted
1400    yet. The reweighting has to be applied after correlating the observables.
1401    Currently only works if ensembles are identical (this is not strictly necessary).
1402    """
1403
1404    if sorted(obs_a.names) != sorted(obs_b.names):
1405        raise Exception('Ensembles do not fit')
1406    if len(obs_a.cov_names) or len(obs_b.cov_names):
1407        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1408    for name in obs_a.names:
1409        if obs_a.shape[name] != obs_b.shape[name]:
1410            raise Exception('Shapes of ensemble', name, 'do not fit')
1411        if obs_a.idl[name] != obs_b.idl[name]:
1412            raise Exception('idl of ensemble', name, 'do not fit')
1413
1414    if obs_a.reweighted is True:
1415        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1416    if obs_b.reweighted is True:
1417        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1418
1419    new_samples = []
1420    new_idl = []
1421    for name in sorted(obs_a.names):
1422        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1423        new_idl.append(obs_a.idl[name])
1424
1425    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1426    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1427    o.reweighted = obs_a.reweighted or obs_b.reweighted
1428    return o

Correlate two observables.

Parameters

obs_a (Obs): First observable
obs_b (Obs): Second observable

Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): View Source

1431def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1432    r'''Calculates the error covariance matrix of a set of observables.
1433
1434    WARNING: This function should be used with care, especially for observables with support on multiple
1435             ensembles with differing autocorrelations. See the notes below for details.
1436
1437    The gamma method has to be applied first to all observables.
1438
1439    Parameters
1440    ----------
1441    obs : list or numpy.ndarray
1442        List or one dimensional array of Obs
1443    visualize : bool
1444        If True plots the corresponding normalized correlation matrix (default False).
1445    correlation : bool
1446        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1447    smooth : None or int
1448        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1449        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1450        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1451        small ones.
1452
1453    Notes
1454    -----
1455    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1456    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1457    in the absence of autocorrelation.
1458    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1459    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1460    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1461    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1462    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1463    '''
1464
1465    length = len(obs)
1466
1467    max_samples = np.max([o.N for o in obs])
1468    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1469        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1470
1471    cov = np.zeros((length, length))
1472    for i in range(length):
1473        for j in range(i, length):
1474            cov[i, j] = _covariance_element(obs[i], obs[j])
1475    cov = cov + cov.T - np.diag(np.diag(cov))
1476
1477    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1478
1479    if isinstance(smooth, int):
1480        corr = _smooth_eigenvalues(corr, smooth)
1481
1482    if visualize:
1483        plt.matshow(corr, vmin=-1, vmax=1)
1484        plt.set_cmap('RdBu')
1485        plt.colorbar()
1486        plt.draw()
1487
1488    if correlation is True:
1489        return corr
1490
1491    errors = [o.dvalue for o in obs]
1492    cov = np.diag(errors) @ corr @ np.diag(errors)
1493
1494    eigenvalues = np.linalg.eigh(cov)[0]
1495    if not np.all(eigenvalues >= 0):
1496        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1497
1498    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters

obs (list or numpy.ndarray): List or one dimensional array of Obs
visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.

Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None): View Source

1578def import_jackknife(jacks, name, idl=None):
1579    """Imports jackknife samples and returns an Obs
1580
1581    Parameters
1582    ----------
1583    jacks : numpy.ndarray
1584        numpy array containing the mean value as zeroth entry and
1585        the N jackknife samples as first to Nth entry.
1586    name : str
1587        name of the ensemble the samples are defined on.
1588    """
1589    length = len(jacks) - 1
1590    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1591    samples = jacks[1:] @ prj
1592    mean = np.mean(samples)
1593    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1594    new_obs._value = jacks[0]
1595    return new_obs

Imports jackknife samples and returns an Obs

Parameters

jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
name (str): name of the ensemble the samples are defined on.

def merge_obs(list_of_obs): View Source

1598def merge_obs(list_of_obs):
1599    """Combine all observables in list_of_obs into one new observable
1600
1601    Parameters
1602    ----------
1603    list_of_obs : list
1604        list of the Obs object to be combined
1605
1606    Notes
1607    -----
1608    It is not possible to combine obs which are based on the same replicum
1609    """
1610    replist = [item for obs in list_of_obs for item in obs.names]
1611    if (len(replist) == len(set(replist))) is False:
1612        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1613    if any([len(o.cov_names) for o in list_of_obs]):
1614        raise Exception('Not possible to merge data that contains covobs!')
1615    new_dict = {}
1616    idl_dict = {}
1617    for o in list_of_obs:
1618        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1619                        for key in set(o.deltas) | set(o.r_values)})
1620        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1621
1622    names = sorted(new_dict.keys())
1623    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1624    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1625    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1626    return o

Combine all observables in list_of_obs into one new observable

Parameters

list_of_obs (list): list of the Obs object to be combined

Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None): View Source

1629def cov_Obs(means, cov, name, grad=None):
1630    """Create an Obs based on mean(s) and a covariance matrix
1631
1632    Parameters
1633    ----------
1634    mean : list of floats or float
1635        N mean value(s) of the new Obs
1636    cov : list or array
1637        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1638    name : str
1639        identifier for the covariance matrix
1640    grad : list or array
1641        Gradient of the Covobs wrt. the means belonging to cov.
1642    """
1643
1644    def covobs_to_obs(co):
1645        """Make an Obs out of a Covobs
1646
1647        Parameters
1648        ----------
1649        co : Covobs
1650            Covobs to be embedded into the Obs
1651        """
1652        o = Obs([], [], means=[])
1653        o._value = co.value
1654        o.names.append(co.name)
1655        o._covobs[co.name] = co
1656        o._dvalue = np.sqrt(co.errsq())
1657        return o
1658
1659    ol = []
1660    if isinstance(means, (float, int)):
1661        means = [means]
1662
1663    for i in range(len(means)):
1664        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1665    if ol[0].covobs[name].N != len(means):
1666        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1667    if len(ol) == 1:
1668        return ol[0]
1669    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters

mean (list of floats or float): N mean value(s) of the new Obs
cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
name (str): identifier for the covariance matrix
grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.