pyerrors.fits

View Source

  1import gc
  2from collections.abc import Sequence
  3import warnings
  4import numpy as np
  5import autograd.numpy as anp
  6import scipy.optimize
  7import scipy.stats
  8import matplotlib.pyplot as plt
  9from matplotlib import gridspec
 10from scipy.odr import ODR, Model, RealData
 11import iminuit
 12from autograd import jacobian as auto_jacobian
 13from autograd import hessian as auto_hessian
 14from autograd import elementwise_grad as egrad
 15from numdifftools import Jacobian as num_jacobian
 16from numdifftools import Hessian as num_hessian
 17from .obs import Obs, derived_observable, covariance, cov_Obs
 18
 19
 20class Fit_result(Sequence):
 21    """Represents fit results.
 22
 23    Attributes
 24    ----------
 25    fit_parameters : list
 26        results for the individual fit parameters,
 27        also accessible via indices.
 28    """
 29
 30    def __init__(self):
 31        self.fit_parameters = None
 32
 33    def __getitem__(self, idx):
 34        return self.fit_parameters[idx]
 35
 36    def __len__(self):
 37        return len(self.fit_parameters)
 38
 39    def gamma_method(self, **kwargs):
 40        """Apply the gamma method to all fit parameters"""
 41        [o.gamma_method(**kwargs) for o in self.fit_parameters]
 42
 43    def __str__(self):
 44        my_str = 'Goodness of fit:\n'
 45        if hasattr(self, 'chisquare_by_dof'):
 46            my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n'
 47        elif hasattr(self, 'residual_variance'):
 48            my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n'
 49        if hasattr(self, 'chisquare_by_expected_chisquare'):
 50            my_str += '\u03C7\u00b2/\u03C7\u00b2exp  = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n'
 51        if hasattr(self, 'p_value'):
 52            my_str += 'p-value   = ' + f'{self.p_value:2.4f}' + '\n'
 53        my_str += 'Fit parameters:\n'
 54        for i_par, par in enumerate(self.fit_parameters):
 55            my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n'
 56        return my_str
 57
 58    def __repr__(self):
 59        m = max(map(len, list(self.__dict__.keys()))) + 1
 60        return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())])
 61
 62
 63def least_squares(x, y, func, priors=None, silent=False, **kwargs):
 64    r'''Performs a non-linear fit to y = func(x).
 65
 66    Parameters
 67    ----------
 68    x : list
 69        list of floats.
 70    y : list
 71        list of Obs.
 72    func : object
 73        fit function, has to be of the form
 74
 75        ```python
 76        import autograd.numpy as anp
 77
 78        def func(a, x):
 79            return a[0] + a[1] * x + a[2] * anp.sinh(x)
 80        ```
 81
 82        For multiple x values func can be of the form
 83
 84        ```python
 85        def func(a, x):
 86            (x1, x2) = x
 87            return a[0] * x1 ** 2 + a[1] * x2
 88        ```
 89
 90        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
 91        will not work.
 92    priors : list, optional
 93        priors has to be a list with an entry for every parameter in the fit. The entries can either be
 94        Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like
 95        0.548(23), 500(40) or 0.5(0.4)
 96    silent : bool, optional
 97        If true all output to the console is omitted (default False).
 98    initial_guess : list
 99        can provide an initial guess for the input parameters. Relevant for
100        non-linear fits with many parameters. In case of correlated fits the guess is used to perform
101        an uncorrelated fit which then serves as guess for the correlated fit.
102    method : str, optional
103        can be used to choose an alternative method for the minimization of chisquare.
104        The possible methods are the ones which can be used for scipy.optimize.minimize and
105        migrad of iminuit. If no method is specified, Levenberg-Marquard is used.
106        Reliable alternatives are migrad, Powell and Nelder-Mead.
107    correlated_fit : bool
108        If True, use the full inverse covariance matrix in the definition of the chisquare cost function.
109        For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
110        In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix).
111        This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning).
112        At the moment this option only works for `prior==None` and when no `method` is given.
113    expected_chisquare : bool
114        If True estimates the expected chisquare which is
115        corrected by effects caused by correlated input data (default False).
116    resplot : bool
117        If True, a plot which displays fit, data and residuals is generated (default False).
118    qqplot : bool
119        If True, a quantile-quantile plot of the fit result is generated (default False).
120    num_grad : bool
121        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
122    '''
123    if priors is not None:
124        return _prior_fit(x, y, func, priors, silent=silent, **kwargs)
125    else:
126        return _standard_fit(x, y, func, silent=silent, **kwargs)
127
128
129def total_least_squares(x, y, func, silent=False, **kwargs):
130    r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
131
132    Parameters
133    ----------
134    x : list
135        list of Obs, or a tuple of lists of Obs
136    y : list
137        list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
138    func : object
139        func has to be of the form
140
141        ```python
142        import autograd.numpy as anp
143
144        def func(a, x):
145            return a[0] + a[1] * x + a[2] * anp.sinh(x)
146        ```
147
148        For multiple x values func can be of the form
149
150        ```python
151        def func(a, x):
152            (x1, x2) = x
153            return a[0] * x1 ** 2 + a[1] * x2
154        ```
155
156        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
157        will not work.
158    silent : bool, optional
159        If true all output to the console is omitted (default False).
160    initial_guess : list
161        can provide an initial guess for the input parameters. Relevant for non-linear
162        fits with many parameters.
163    expected_chisquare : bool
164        If true prints the expected chisquare which is
165        corrected by effects caused by correlated input data.
166        This can take a while as the full correlation matrix
167        has to be calculated (default False).
168    num_grad : bool
169        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
170
171    Notes
172    -----
173    Based on the orthogonal distance regression module of scipy
174    '''
175
176    output = Fit_result()
177
178    output.fit_function = func
179
180    x = np.array(x)
181
182    x_shape = x.shape
183
184    if kwargs.get('num_grad') is True:
185        jacobian = num_jacobian
186        hessian = num_hessian
187    else:
188        jacobian = auto_jacobian
189        hessian = auto_hessian
190
191    if not callable(func):
192        raise TypeError('func has to be a function.')
193
194    for i in range(42):
195        try:
196            func(np.arange(i), x.T[0])
197        except TypeError:
198            continue
199        except IndexError:
200            continue
201        else:
202            break
203    else:
204        raise RuntimeError("Fit function is not valid.")
205
206    n_parms = i
207    if not silent:
208        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
209
210    x_f = np.vectorize(lambda o: o.value)(x)
211    dx_f = np.vectorize(lambda o: o.dvalue)(x)
212    y_f = np.array([o.value for o in y])
213    dy_f = np.array([o.dvalue for o in y])
214
215    if np.any(np.asarray(dx_f) <= 0.0):
216        raise Exception('No x errors available, run the gamma method first.')
217
218    if np.any(np.asarray(dy_f) <= 0.0):
219        raise Exception('No y errors available, run the gamma method first.')
220
221    if 'initial_guess' in kwargs:
222        x0 = kwargs.get('initial_guess')
223        if len(x0) != n_parms:
224            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
225    else:
226        x0 = [1] * n_parms
227
228    data = RealData(x_f, y_f, sx=dx_f, sy=dy_f)
229    model = Model(func)
230    odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps)
231    odr.set_job(fit_type=0, deriv=1)
232    out = odr.run()
233
234    output.residual_variance = out.res_var
235
236    output.method = 'ODR'
237
238    output.message = out.stopreason
239
240    output.xplus = out.xplus
241
242    if not silent:
243        print('Method: ODR')
244        print(*out.stopreason)
245        print('Residual variance:', output.residual_variance)
246
247    if out.info > 3:
248        raise Exception('The minimization procedure did not converge.')
249
250    m = x_f.size
251
252    def odr_chisquare(p):
253        model = func(p[:n_parms], p[n_parms:].reshape(x_shape))
254        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2)
255        return chisq
256
257    if kwargs.get('expected_chisquare') is True:
258        W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel()))))
259
260        if kwargs.get('covariance') is not None:
261            cov = kwargs.get('covariance')
262        else:
263            cov = covariance(np.concatenate((y, x.ravel())))
264
265        number_of_x_parameters = int(m / x_f.shape[-1])
266
267        old_jac = jacobian(func)(out.beta, out.xplus)
268        fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0)))
269        fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0])))
270        new_jac = np.concatenate((fused_row1, fused_row2), axis=1)
271
272        A = W @ new_jac
273        P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
274        expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W)
275        if expected_chisquare <= 0.0:
276            warnings.warn("Negative expected_chisquare.", RuntimeWarning)
277            expected_chisquare = np.abs(expected_chisquare)
278        output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare
279        if not silent:
280            print('chisquare/expected_chisquare:',
281                  output.chisquare_by_expected_chisquare)
282
283    fitp = out.beta
284    try:
285        hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel())))
286    except TypeError:
287        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
288
289    def odr_chisquare_compact_x(d):
290        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
291        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
292        return chisq
293
294    jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel())))
295
296    # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv
297    try:
298        deriv_x = -scipy.linalg.solve(hess, jac_jac_x[:n_parms + m, n_parms + m:])
299    except np.linalg.LinAlgError:
300        raise Exception("Cannot invert hessian matrix.")
301
302    def odr_chisquare_compact_y(d):
303        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
304        chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
305        return chisq
306
307    jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f)))
308
309    # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv
310    try:
311        deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms + m, n_parms + m:])
312    except np.linalg.LinAlgError:
313        raise Exception("Cannot invert hessian matrix.")
314
315    result = []
316    for i in range(n_parms):
317        result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i])))
318
319    output.fit_parameters = result
320
321    output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel())))
322    output.dof = x.shape[-1] - n_parms
323    output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof)
324
325    return output
326
327
328def _prior_fit(x, y, func, priors, silent=False, **kwargs):
329    output = Fit_result()
330
331    output.fit_function = func
332
333    x = np.asarray(x)
334
335    if kwargs.get('num_grad') is True:
336        hessian = num_hessian
337    else:
338        hessian = auto_hessian
339
340    if not callable(func):
341        raise TypeError('func has to be a function.')
342
343    for i in range(100):
344        try:
345            func(np.arange(i), 0)
346        except TypeError:
347            continue
348        except IndexError:
349            continue
350        else:
351            break
352    else:
353        raise RuntimeError("Fit function is not valid.")
354
355    n_parms = i
356
357    if n_parms != len(priors):
358        raise Exception('Priors does not have the correct length.')
359
360    def extract_val_and_dval(string):
361        split_string = string.split('(')
362        if '.' in split_string[0] and '.' not in split_string[1][:-1]:
363            factor = 10 ** -len(split_string[0].partition('.')[2])
364        else:
365            factor = 1
366        return float(split_string[0]), float(split_string[1][:-1]) * factor
367
368    loc_priors = []
369    for i_n, i_prior in enumerate(priors):
370        if isinstance(i_prior, Obs):
371            loc_priors.append(i_prior)
372        else:
373            loc_val, loc_dval = extract_val_and_dval(i_prior)
374            loc_priors.append(cov_Obs(loc_val, loc_dval ** 2, '#prior' + str(i_n) + f"_{np.random.randint(2147483647):010d}"))
375
376    output.priors = loc_priors
377
378    if not silent:
379        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
380
381    y_f = [o.value for o in y]
382    dy_f = [o.dvalue for o in y]
383
384    if np.any(np.asarray(dy_f) <= 0.0):
385        raise Exception('No y errors available, run the gamma method first.')
386
387    p_f = [o.value for o in loc_priors]
388    dp_f = [o.dvalue for o in loc_priors]
389
390    if np.any(np.asarray(dp_f) <= 0.0):
391        raise Exception('No prior errors available, run the gamma method first.')
392
393    if 'initial_guess' in kwargs:
394        x0 = kwargs.get('initial_guess')
395        if len(x0) != n_parms:
396            raise Exception('Initial guess does not have the correct length.')
397    else:
398        x0 = p_f
399
400    def chisqfunc(p):
401        model = func(p, x)
402        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((p_f - p) / dp_f) ** 2)
403        return chisq
404
405    if not silent:
406        print('Method: migrad')
407
408    m = iminuit.Minuit(chisqfunc, x0)
409    m.errordef = 1
410    m.print_level = 0
411    if 'tol' in kwargs:
412        m.tol = kwargs.get('tol')
413    else:
414        m.tol = 1e-4
415    m.migrad()
416    params = np.asarray(m.values)
417
418    output.chisquare_by_dof = m.fval / len(x)
419
420    output.method = 'migrad'
421
422    if not silent:
423        print('chisquare/d.o.f.:', output.chisquare_by_dof)
424
425    if not m.fmin.is_valid:
426        raise Exception('The minimization procedure did not converge.')
427
428    hess = hessian(chisqfunc)(params)
429    hess_inv = np.linalg.pinv(hess)
430
431    def chisqfunc_compact(d):
432        model = func(d[:n_parms], x)
433        chisq = anp.sum(((d[n_parms: n_parms + len(x)] - model) / dy_f) ** 2) + anp.sum(((d[n_parms + len(x):] - d[:n_parms]) / dp_f) ** 2)
434        return chisq
435
436    jac_jac = hessian(chisqfunc_compact)(np.concatenate((params, y_f, p_f)))
437
438    deriv = -hess_inv @ jac_jac[:n_parms, n_parms:]
439
440    result = []
441    for i in range(n_parms):
442        result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * params[i], list(y) + list(loc_priors), man_grad=list(deriv[i])))
443
444    output.fit_parameters = result
445    output.chisquare = chisqfunc(np.asarray(params))
446
447    if kwargs.get('resplot') is True:
448        residual_plot(x, y, func, result)
449
450    if kwargs.get('qqplot') is True:
451        qqplot(x, y, func, result)
452
453    return output
454
455
456def _standard_fit(x, y, func, silent=False, **kwargs):
457
458    output = Fit_result()
459
460    output.fit_function = func
461
462    x = np.asarray(x)
463
464    if kwargs.get('num_grad') is True:
465        jacobian = num_jacobian
466        hessian = num_hessian
467    else:
468        jacobian = auto_jacobian
469        hessian = auto_hessian
470
471    if x.shape[-1] != len(y):
472        raise Exception('x and y input have to have the same length')
473
474    if len(x.shape) > 2:
475        raise Exception('Unknown format for x values')
476
477    if not callable(func):
478        raise TypeError('func has to be a function.')
479
480    for i in range(42):
481        try:
482            func(np.arange(i), x.T[0])
483        except TypeError:
484            continue
485        except IndexError:
486            continue
487        else:
488            break
489    else:
490        raise RuntimeError("Fit function is not valid.")
491
492    n_parms = i
493
494    if not silent:
495        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
496
497    y_f = [o.value for o in y]
498    dy_f = [o.dvalue for o in y]
499
500    if np.any(np.asarray(dy_f) <= 0.0):
501        raise Exception('No y errors available, run the gamma method first.')
502
503    if 'initial_guess' in kwargs:
504        x0 = kwargs.get('initial_guess')
505        if len(x0) != n_parms:
506            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
507    else:
508        x0 = [0.1] * n_parms
509
510    if kwargs.get('correlated_fit') is True:
511        corr = covariance(y, correlation=True, **kwargs)
512        covdiag = np.diag(1 / np.asarray(dy_f))
513        condn = np.linalg.cond(corr)
514        if condn > 0.1 / np.finfo(float).eps:
515            raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
516        if condn > 1e13:
517            warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
518        chol = np.linalg.cholesky(corr)
519        chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
520
521        def chisqfunc_corr(p):
522            model = func(p, x)
523            chisq = anp.sum(anp.dot(chol_inv, (y_f - model)) ** 2)
524            return chisq
525
526    def chisqfunc(p):
527        model = func(p, x)
528        chisq = anp.sum(((y_f - model) / dy_f) ** 2)
529        return chisq
530
531    output.method = kwargs.get('method', 'Levenberg-Marquardt')
532    if not silent:
533        print('Method:', output.method)
534
535    if output.method != 'Levenberg-Marquardt':
536        if output.method == 'migrad':
537            fit_result = iminuit.minimize(chisqfunc, x0, tol=1e-4)  # Stopping criterion 0.002 * tol * errordef
538            if kwargs.get('correlated_fit') is True:
539                fit_result = iminuit.minimize(chisqfunc_corr, fit_result.x, tol=1e-4)  # Stopping criterion 0.002 * tol * errordef
540            output.iterations = fit_result.nfev
541        else:
542            fit_result = scipy.optimize.minimize(chisqfunc, x0, method=kwargs.get('method'), tol=1e-12)
543            if kwargs.get('correlated_fit') is True:
544                fit_result = scipy.optimize.minimize(chisqfunc_corr, fit_result.x, method=kwargs.get('method'), tol=1e-12)
545            output.iterations = fit_result.nit
546
547        chisquare = fit_result.fun
548
549    else:
550        if kwargs.get('correlated_fit') is True:
551            def chisqfunc_residuals_corr(p):
552                model = func(p, x)
553                chisq = anp.dot(chol_inv, (y_f - model))
554                return chisq
555
556        def chisqfunc_residuals(p):
557            model = func(p, x)
558            chisq = ((y_f - model) / dy_f)
559            return chisq
560
561        fit_result = scipy.optimize.least_squares(chisqfunc_residuals, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
562        if kwargs.get('correlated_fit') is True:
563            fit_result = scipy.optimize.least_squares(chisqfunc_residuals_corr, fit_result.x, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
564
565        chisquare = np.sum(fit_result.fun ** 2)
566        if kwargs.get('correlated_fit') is True:
567            assert np.isclose(chisquare, chisqfunc_corr(fit_result.x), atol=1e-14)
568        else:
569            assert np.isclose(chisquare, chisqfunc(fit_result.x), atol=1e-14)
570
571        output.iterations = fit_result.nfev
572
573    if not fit_result.success:
574        raise Exception('The minimization procedure did not converge.')
575
576    if x.shape[-1] - n_parms > 0:
577        output.chisquare_by_dof = chisquare / (x.shape[-1] - n_parms)
578    else:
579        output.chisquare_by_dof = float('nan')
580
581    output.message = fit_result.message
582    if not silent:
583        print(fit_result.message)
584        print('chisquare/d.o.f.:', output.chisquare_by_dof)
585
586    if kwargs.get('expected_chisquare') is True:
587        if kwargs.get('correlated_fit') is not True:
588            W = np.diag(1 / np.asarray(dy_f))
589            cov = covariance(y)
590            A = W @ jacobian(func)(fit_result.x, x)
591            P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
592            expected_chisquare = np.trace((np.identity(x.shape[-1]) - P_phi) @ W @ cov @ W)
593            output.chisquare_by_expected_chisquare = chisquare / expected_chisquare
594            if not silent:
595                print('chisquare/expected_chisquare:',
596                      output.chisquare_by_expected_chisquare)
597
598    fitp = fit_result.x
599    try:
600        if kwargs.get('correlated_fit') is True:
601            hess = hessian(chisqfunc_corr)(fitp)
602        else:
603            hess = hessian(chisqfunc)(fitp)
604    except TypeError:
605        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
606
607    if kwargs.get('correlated_fit') is True:
608        def chisqfunc_compact(d):
609            model = func(d[:n_parms], x)
610            chisq = anp.sum(anp.dot(chol_inv, (d[n_parms:] - model)) ** 2)
611            return chisq
612
613    else:
614        def chisqfunc_compact(d):
615            model = func(d[:n_parms], x)
616            chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2)
617            return chisq
618
619    jac_jac = hessian(chisqfunc_compact)(np.concatenate((fitp, y_f)))
620
621    # Compute hess^{-1} @ jac_jac[:n_parms, n_parms:] using LAPACK dgesv
622    try:
623        deriv = -scipy.linalg.solve(hess, jac_jac[:n_parms, n_parms:])
624    except np.linalg.LinAlgError:
625        raise Exception("Cannot invert hessian matrix.")
626
627    result = []
628    for i in range(n_parms):
629        result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * fit_result.x[i], list(y), man_grad=list(deriv[i])))
630
631    output.fit_parameters = result
632
633    output.chisquare = chisquare
634    output.dof = x.shape[-1] - n_parms
635    output.p_value = 1 - scipy.stats.chi2.cdf(output.chisquare, output.dof)
636
637    if kwargs.get('resplot') is True:
638        residual_plot(x, y, func, result)
639
640    if kwargs.get('qqplot') is True:
641        qqplot(x, y, func, result)
642
643    return output
644
645
646def fit_lin(x, y, **kwargs):
647    """Performs a linear fit to y = n + m * x and returns two Obs n, m.
648
649    Parameters
650    ----------
651    x : list
652        Can either be a list of floats in which case no xerror is assumed, or
653        a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
654    y : list
655        List of Obs, the dvalues of the Obs are used as yerror for the fit.
656    """
657
658    def f(a, x):
659        y = a[0] + a[1] * x
660        return y
661
662    if all(isinstance(n, Obs) for n in x):
663        out = total_least_squares(x, y, f, **kwargs)
664        return out.fit_parameters
665    elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray):
666        out = least_squares(x, y, f, **kwargs)
667        return out.fit_parameters
668    else:
669        raise Exception('Unsupported types for x')
670
671
672def qqplot(x, o_y, func, p):
673    """Generates a quantile-quantile plot of the fit result which can be used to
674       check if the residuals of the fit are gaussian distributed.
675    """
676
677    residuals = []
678    for i_x, i_y in zip(x, o_y):
679        residuals.append((i_y - func(p, i_x)) / i_y.dvalue)
680    residuals = sorted(residuals)
681    my_y = [o.value for o in residuals]
682    probplot = scipy.stats.probplot(my_y)
683    my_x = probplot[0][0]
684    plt.figure(figsize=(8, 8 / 1.618))
685    plt.errorbar(my_x, my_y, fmt='o')
686    fit_start = my_x[0]
687    fit_stop = my_x[-1]
688    samples = np.arange(fit_start, fit_stop, 0.01)
689    plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution')
690    plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-')
691
692    plt.xlabel('Theoretical quantiles')
693    plt.ylabel('Ordered Values')
694    plt.legend()
695    plt.draw()
696
697
698def residual_plot(x, y, func, fit_res):
699    """ Generates a plot which compares the fit to the data and displays the corresponding residuals"""
700    sorted_x = sorted(x)
701    xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0])
702    xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2])
703    x_samples = np.arange(xstart, xstop + 0.01, 0.01)
704
705    plt.figure(figsize=(8, 8 / 1.618))
706    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0)
707    ax0 = plt.subplot(gs[0])
708    ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data')
709    ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0)
710    ax0.set_xticklabels([])
711    ax0.set_xlim([xstart, xstop])
712    ax0.set_xticklabels([])
713    ax0.legend()
714
715    residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y])
716    ax1 = plt.subplot(gs[1])
717    ax1.plot(x, residuals, 'ko', ls='none', markersize=5)
718    ax1.tick_params(direction='out')
719    ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True)
720    ax1.axhline(y=0.0, ls='--', color='k', marker=" ")
721    ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k')
722    ax1.set_xlim([xstart, xstop])
723    ax1.set_ylabel('Residuals')
724    plt.subplots_adjust(wspace=None, hspace=None)
725    plt.draw()
726
727
728def error_band(x, func, beta):
729    """Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta."""
730    cov = covariance(beta)
731    if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps):
732        warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning)
733
734    deriv = []
735    for i, item in enumerate(x):
736        deriv.append(np.array(egrad(func)([o.value for o in beta], item)))
737
738    err = []
739    for i, item in enumerate(x):
740        err.append(np.sqrt(deriv[i] @ cov @ deriv[i]))
741    err = np.array(err)
742
743    return err
744
745
746def ks_test(objects=None):
747    """Performs a Kolmogorov–Smirnov test for the p-values of all fit object.
748
749    Parameters
750    ----------
751    objects : list
752        List of fit results to include in the analysis (optional).
753    """
754
755    if objects is None:
756        obs_list = []
757        for obj in gc.get_objects():
758            if isinstance(obj, Fit_result):
759                obs_list.append(obj)
760    else:
761        obs_list = objects
762
763    p_values = [o.p_value for o in obs_list]
764
765    bins = len(p_values)
766    x = np.arange(0, 1.001, 0.001)
767    plt.plot(x, x, 'k', zorder=1)
768    plt.xlim(0, 1)
769    plt.ylim(0, 1)
770    plt.xlabel('p-value')
771    plt.ylabel('Cumulative probability')
772    plt.title(str(bins) + ' p-values')
773
774    n = np.arange(1, bins + 1) / np.float64(bins)
775    Xs = np.sort(p_values)
776    plt.step(Xs, n)
777    diffs = n - Xs
778    loc_max_diff = np.argmax(np.abs(diffs))
779    loc = Xs[loc_max_diff]
780    plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0))
781    plt.draw()
782
783    print(scipy.stats.kstest(p_values, 'uniform'))

class Fit_result(collections.abc.Sequence): View Source

21class Fit_result(Sequence):
22    """Represents fit results.
23
24    Attributes
25    ----------
26    fit_parameters : list
27        results for the individual fit parameters,
28        also accessible via indices.
29    """
30
31    def __init__(self):
32        self.fit_parameters = None
33
34    def __getitem__(self, idx):
35        return self.fit_parameters[idx]
36
37    def __len__(self):
38        return len(self.fit_parameters)
39
40    def gamma_method(self, **kwargs):
41        """Apply the gamma method to all fit parameters"""
42        [o.gamma_method(**kwargs) for o in self.fit_parameters]
43
44    def __str__(self):
45        my_str = 'Goodness of fit:\n'
46        if hasattr(self, 'chisquare_by_dof'):
47            my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n'
48        elif hasattr(self, 'residual_variance'):
49            my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n'
50        if hasattr(self, 'chisquare_by_expected_chisquare'):
51            my_str += '\u03C7\u00b2/\u03C7\u00b2exp  = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n'
52        if hasattr(self, 'p_value'):
53            my_str += 'p-value   = ' + f'{self.p_value:2.4f}' + '\n'
54        my_str += 'Fit parameters:\n'
55        for i_par, par in enumerate(self.fit_parameters):
56            my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n'
57        return my_str
58
59    def __repr__(self):
60        m = max(map(len, list(self.__dict__.keys()))) + 1
61        return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())])

Represents fit results.

Attributes

fit_parameters (list): results for the individual fit parameters, also accessible via indices.

Fit_result() View Source

31    def __init__(self):
32        self.fit_parameters = None

def gamma_method(self, **kwargs): View Source

40    def gamma_method(self, **kwargs):
41        """Apply the gamma method to all fit parameters"""
42        [o.gamma_method(**kwargs) for o in self.fit_parameters]

Apply the gamma method to all fit parameters

Inherited Members

collections.abc.Sequence: index; count

def least_squares(x, y, func, priors=None, silent=False, **kwargs): View Source

 64def least_squares(x, y, func, priors=None, silent=False, **kwargs):
 65    r'''Performs a non-linear fit to y = func(x).
 66
 67    Parameters
 68    ----------
 69    x : list
 70        list of floats.
 71    y : list
 72        list of Obs.
 73    func : object
 74        fit function, has to be of the form
 75
 76        ```python
 77        import autograd.numpy as anp
 78
 79        def func(a, x):
 80            return a[0] + a[1] * x + a[2] * anp.sinh(x)
 81        ```
 82
 83        For multiple x values func can be of the form
 84
 85        ```python
 86        def func(a, x):
 87            (x1, x2) = x
 88            return a[0] * x1 ** 2 + a[1] * x2
 89        ```
 90
 91        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
 92        will not work.
 93    priors : list, optional
 94        priors has to be a list with an entry for every parameter in the fit. The entries can either be
 95        Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like
 96        0.548(23), 500(40) or 0.5(0.4)
 97    silent : bool, optional
 98        If true all output to the console is omitted (default False).
 99    initial_guess : list
100        can provide an initial guess for the input parameters. Relevant for
101        non-linear fits with many parameters. In case of correlated fits the guess is used to perform
102        an uncorrelated fit which then serves as guess for the correlated fit.
103    method : str, optional
104        can be used to choose an alternative method for the minimization of chisquare.
105        The possible methods are the ones which can be used for scipy.optimize.minimize and
106        migrad of iminuit. If no method is specified, Levenberg-Marquard is used.
107        Reliable alternatives are migrad, Powell and Nelder-Mead.
108    correlated_fit : bool
109        If True, use the full inverse covariance matrix in the definition of the chisquare cost function.
110        For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
111        In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix).
112        This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning).
113        At the moment this option only works for `prior==None` and when no `method` is given.
114    expected_chisquare : bool
115        If True estimates the expected chisquare which is
116        corrected by effects caused by correlated input data (default False).
117    resplot : bool
118        If True, a plot which displays fit, data and residuals is generated (default False).
119    qqplot : bool
120        If True, a quantile-quantile plot of the fit result is generated (default False).
121    num_grad : bool
122        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
123    '''
124    if priors is not None:
125        return _prior_fit(x, y, func, priors, silent=silent, **kwargs)
126    else:
127        return _standard_fit(x, y, func, silent=silent, **kwargs)

Performs a non-linear fit to y = func(x).

Parameters

x (list): list of floats.
y (list): list of Obs.

func (object): fit function, has to be of the form

import autograd.numpy as anp

def func(a, x):
   return a[0] + a[1] * x + a[2] * anp.sinh(x)

For multiple x values func can be of the form

def func(a, x):
   (x1, x2) = x
   return a[0] * x1 ** 2 + a[1] * x2

It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.

priors (list, optional): priors has to be a list with an entry for every parameter in the fit. The entries can either be Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 0.548(23), 500(40) or 0.5(0.4)
silent (bool, optional): If true all output to the console is omitted (default False).
initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters. In case of correlated fits the guess is used to perform an uncorrelated fit which then serves as guess for the correlated fit.
method (str, optional): can be used to choose an alternative method for the minimization of chisquare. The possible methods are the ones which can be used for scipy.optimize.minimize and migrad of iminuit. If no method is specified, Levenberg-Marquard is used. Reliable alternatives are migrad, Powell and Nelder-Mead.
correlated_fit (bool): If True, use the full inverse covariance matrix in the definition of the chisquare cost function. For details about how the covariance matrix is estimated see pyerrors.obs.covariance. In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix). This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning). At the moment this option only works for prior==None and when no method is given.
expected_chisquare (bool): If True estimates the expected chisquare which is corrected by effects caused by correlated input data (default False).
resplot (bool): If True, a plot which displays fit, data and residuals is generated (default False).
qqplot (bool): If True, a quantile-quantile plot of the fit result is generated (default False).
num_grad (bool): Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).

def total_least_squares(x, y, func, silent=False, **kwargs): View Source

130def total_least_squares(x, y, func, silent=False, **kwargs):
131    r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
132
133    Parameters
134    ----------
135    x : list
136        list of Obs, or a tuple of lists of Obs
137    y : list
138        list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
139    func : object
140        func has to be of the form
141
142        ```python
143        import autograd.numpy as anp
144
145        def func(a, x):
146            return a[0] + a[1] * x + a[2] * anp.sinh(x)
147        ```
148
149        For multiple x values func can be of the form
150
151        ```python
152        def func(a, x):
153            (x1, x2) = x
154            return a[0] * x1 ** 2 + a[1] * x2
155        ```
156
157        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
158        will not work.
159    silent : bool, optional
160        If true all output to the console is omitted (default False).
161    initial_guess : list
162        can provide an initial guess for the input parameters. Relevant for non-linear
163        fits with many parameters.
164    expected_chisquare : bool
165        If true prints the expected chisquare which is
166        corrected by effects caused by correlated input data.
167        This can take a while as the full correlation matrix
168        has to be calculated (default False).
169    num_grad : bool
170        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
171
172    Notes
173    -----
174    Based on the orthogonal distance regression module of scipy
175    '''
176
177    output = Fit_result()
178
179    output.fit_function = func
180
181    x = np.array(x)
182
183    x_shape = x.shape
184
185    if kwargs.get('num_grad') is True:
186        jacobian = num_jacobian
187        hessian = num_hessian
188    else:
189        jacobian = auto_jacobian
190        hessian = auto_hessian
191
192    if not callable(func):
193        raise TypeError('func has to be a function.')
194
195    for i in range(42):
196        try:
197            func(np.arange(i), x.T[0])
198        except TypeError:
199            continue
200        except IndexError:
201            continue
202        else:
203            break
204    else:
205        raise RuntimeError("Fit function is not valid.")
206
207    n_parms = i
208    if not silent:
209        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
210
211    x_f = np.vectorize(lambda o: o.value)(x)
212    dx_f = np.vectorize(lambda o: o.dvalue)(x)
213    y_f = np.array([o.value for o in y])
214    dy_f = np.array([o.dvalue for o in y])
215
216    if np.any(np.asarray(dx_f) <= 0.0):
217        raise Exception('No x errors available, run the gamma method first.')
218
219    if np.any(np.asarray(dy_f) <= 0.0):
220        raise Exception('No y errors available, run the gamma method first.')
221
222    if 'initial_guess' in kwargs:
223        x0 = kwargs.get('initial_guess')
224        if len(x0) != n_parms:
225            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
226    else:
227        x0 = [1] * n_parms
228
229    data = RealData(x_f, y_f, sx=dx_f, sy=dy_f)
230    model = Model(func)
231    odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps)
232    odr.set_job(fit_type=0, deriv=1)
233    out = odr.run()
234
235    output.residual_variance = out.res_var
236
237    output.method = 'ODR'
238
239    output.message = out.stopreason
240
241    output.xplus = out.xplus
242
243    if not silent:
244        print('Method: ODR')
245        print(*out.stopreason)
246        print('Residual variance:', output.residual_variance)
247
248    if out.info > 3:
249        raise Exception('The minimization procedure did not converge.')
250
251    m = x_f.size
252
253    def odr_chisquare(p):
254        model = func(p[:n_parms], p[n_parms:].reshape(x_shape))
255        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2)
256        return chisq
257
258    if kwargs.get('expected_chisquare') is True:
259        W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel()))))
260
261        if kwargs.get('covariance') is not None:
262            cov = kwargs.get('covariance')
263        else:
264            cov = covariance(np.concatenate((y, x.ravel())))
265
266        number_of_x_parameters = int(m / x_f.shape[-1])
267
268        old_jac = jacobian(func)(out.beta, out.xplus)
269        fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0)))
270        fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0])))
271        new_jac = np.concatenate((fused_row1, fused_row2), axis=1)
272
273        A = W @ new_jac
274        P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
275        expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W)
276        if expected_chisquare <= 0.0:
277            warnings.warn("Negative expected_chisquare.", RuntimeWarning)
278            expected_chisquare = np.abs(expected_chisquare)
279        output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare
280        if not silent:
281            print('chisquare/expected_chisquare:',
282                  output.chisquare_by_expected_chisquare)
283
284    fitp = out.beta
285    try:
286        hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel())))
287    except TypeError:
288        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
289
290    def odr_chisquare_compact_x(d):
291        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
292        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
293        return chisq
294
295    jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel())))
296
297    # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv
298    try:
299        deriv_x = -scipy.linalg.solve(hess, jac_jac_x[:n_parms + m, n_parms + m:])
300    except np.linalg.LinAlgError:
301        raise Exception("Cannot invert hessian matrix.")
302
303    def odr_chisquare_compact_y(d):
304        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
305        chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
306        return chisq
307
308    jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f)))
309
310    # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv
311    try:
312        deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms + m, n_parms + m:])
313    except np.linalg.LinAlgError:
314        raise Exception("Cannot invert hessian matrix.")
315
316    result = []
317    for i in range(n_parms):
318        result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i])))
319
320    output.fit_parameters = result
321
322    output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel())))
323    output.dof = x.shape[-1] - n_parms
324    output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof)
325
326    return output

Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.

Parameters

x (list): list of Obs, or a tuple of lists of Obs
y (list): list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.

func (object): func has to be of the form

import autograd.numpy as anp

def func(a, x):
   return a[0] + a[1] * x + a[2] * anp.sinh(x)

For multiple x values func can be of the form

def func(a, x):
   (x1, x2) = x
   return a[0] * x1 ** 2 + a[1] * x2

It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.

silent (bool, optional): If true all output to the console is omitted (default False).
initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters.
expected_chisquare (bool): If true prints the expected chisquare which is corrected by effects caused by correlated input data. This can take a while as the full correlation matrix has to be calculated (default False).
num_grad (bool): Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).

Notes

Based on the orthogonal distance regression module of scipy

def fit_lin(x, y, **kwargs): View Source

647def fit_lin(x, y, **kwargs):
648    """Performs a linear fit to y = n + m * x and returns two Obs n, m.
649
650    Parameters
651    ----------
652    x : list
653        Can either be a list of floats in which case no xerror is assumed, or
654        a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
655    y : list
656        List of Obs, the dvalues of the Obs are used as yerror for the fit.
657    """
658
659    def f(a, x):
660        y = a[0] + a[1] * x
661        return y
662
663    if all(isinstance(n, Obs) for n in x):
664        out = total_least_squares(x, y, f, **kwargs)
665        return out.fit_parameters
666    elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray):
667        out = least_squares(x, y, f, **kwargs)
668        return out.fit_parameters
669    else:
670        raise Exception('Unsupported types for x')

Performs a linear fit to y = n + m * x and returns two Obs n, m.

Parameters

x (list): Can either be a list of floats in which case no xerror is assumed, or a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
y (list): List of Obs, the dvalues of the Obs are used as yerror for the fit.

def qqplot(x, o_y, func, p): View Source

673def qqplot(x, o_y, func, p):
674    """Generates a quantile-quantile plot of the fit result which can be used to
675       check if the residuals of the fit are gaussian distributed.
676    """
677
678    residuals = []
679    for i_x, i_y in zip(x, o_y):
680        residuals.append((i_y - func(p, i_x)) / i_y.dvalue)
681    residuals = sorted(residuals)
682    my_y = [o.value for o in residuals]
683    probplot = scipy.stats.probplot(my_y)
684    my_x = probplot[0][0]
685    plt.figure(figsize=(8, 8 / 1.618))
686    plt.errorbar(my_x, my_y, fmt='o')
687    fit_start = my_x[0]
688    fit_stop = my_x[-1]
689    samples = np.arange(fit_start, fit_stop, 0.01)
690    plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution')
691    plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-')
692
693    plt.xlabel('Theoretical quantiles')
694    plt.ylabel('Ordered Values')
695    plt.legend()
696    plt.draw()

Generates a quantile-quantile plot of the fit result which can be used to check if the residuals of the fit are gaussian distributed.

def residual_plot(x, y, func, fit_res): View Source

699def residual_plot(x, y, func, fit_res):
700    """ Generates a plot which compares the fit to the data and displays the corresponding residuals"""
701    sorted_x = sorted(x)
702    xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0])
703    xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2])
704    x_samples = np.arange(xstart, xstop + 0.01, 0.01)
705
706    plt.figure(figsize=(8, 8 / 1.618))
707    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0)
708    ax0 = plt.subplot(gs[0])
709    ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data')
710    ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0)
711    ax0.set_xticklabels([])
712    ax0.set_xlim([xstart, xstop])
713    ax0.set_xticklabels([])
714    ax0.legend()
715
716    residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y])
717    ax1 = plt.subplot(gs[1])
718    ax1.plot(x, residuals, 'ko', ls='none', markersize=5)
719    ax1.tick_params(direction='out')
720    ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True)
721    ax1.axhline(y=0.0, ls='--', color='k', marker=" ")
722    ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k')
723    ax1.set_xlim([xstart, xstop])
724    ax1.set_ylabel('Residuals')
725    plt.subplots_adjust(wspace=None, hspace=None)
726    plt.draw()

Generates a plot which compares the fit to the data and displays the corresponding residuals

def error_band(x, func, beta): View Source

729def error_band(x, func, beta):
730    """Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta."""
731    cov = covariance(beta)
732    if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps):
733        warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning)
734
735    deriv = []
736    for i, item in enumerate(x):
737        deriv.append(np.array(egrad(func)([o.value for o in beta], item)))
738
739    err = []
740    for i, item in enumerate(x):
741        err.append(np.sqrt(deriv[i] @ cov @ deriv[i]))
742    err = np.array(err)
743
744    return err

Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta.

def ks_test(objects=None): View Source

747def ks_test(objects=None):
748    """Performs a Kolmogorov–Smirnov test for the p-values of all fit object.
749
750    Parameters
751    ----------
752    objects : list
753        List of fit results to include in the analysis (optional).
754    """
755
756    if objects is None:
757        obs_list = []
758        for obj in gc.get_objects():
759            if isinstance(obj, Fit_result):
760                obs_list.append(obj)
761    else:
762        obs_list = objects
763
764    p_values = [o.p_value for o in obs_list]
765
766    bins = len(p_values)
767    x = np.arange(0, 1.001, 0.001)
768    plt.plot(x, x, 'k', zorder=1)
769    plt.xlim(0, 1)
770    plt.ylim(0, 1)
771    plt.xlabel('p-value')
772    plt.ylabel('Cumulative probability')
773    plt.title(str(bins) + ' p-values')
774
775    n = np.arange(1, bins + 1) / np.float64(bins)
776    Xs = np.sort(p_values)
777    plt.step(Xs, n)
778    diffs = n - Xs
779    loc_max_diff = np.argmax(np.abs(diffs))
780    loc = Xs[loc_max_diff]
781    plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0))
782    plt.draw()
783
784    print(scipy.stats.kstest(p_values, 'uniform'))

Performs a Kolmogorov–Smirnov test for the p-values of all fit object.

Parameters

objects (list): List of fit results to include in the analysis (optional).