pyerrors.fits

  1import gc
  2from collections.abc import Sequence
  3import warnings
  4import numpy as np
  5import autograd.numpy as anp
  6import scipy.optimize
  7import scipy.stats
  8import matplotlib.pyplot as plt
  9from matplotlib import gridspec
 10from scipy.odr import ODR, Model, RealData
 11import iminuit
 12from autograd import jacobian as auto_jacobian
 13from autograd import hessian as auto_hessian
 14from autograd import elementwise_grad as egrad
 15from numdifftools import Jacobian as num_jacobian
 16from numdifftools import Hessian as num_hessian
 17from .obs import Obs, derived_observable, covariance, cov_Obs
 18
 19
 20class Fit_result(Sequence):
 21    """Represents fit results.
 22
 23    Attributes
 24    ----------
 25    fit_parameters : list
 26        results for the individual fit parameters,
 27        also accessible via indices.
 28    chisquare_by_dof : float
 29        reduced chisquare.
 30    p_value : float
 31        p-value of the fit
 32    t2_p_value : float
 33        Hotelling t-squared p-value for correlated fits.
 34    """
 35
 36    def __init__(self):
 37        self.fit_parameters = None
 38
 39    def __getitem__(self, idx):
 40        return self.fit_parameters[idx]
 41
 42    def __len__(self):
 43        return len(self.fit_parameters)
 44
 45    def gamma_method(self, **kwargs):
 46        """Apply the gamma method to all fit parameters"""
 47        [o.gamma_method(**kwargs) for o in self.fit_parameters]
 48
 49    gm = gamma_method
 50
 51    def __str__(self):
 52        my_str = 'Goodness of fit:\n'
 53        if hasattr(self, 'chisquare_by_dof'):
 54            my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n'
 55        elif hasattr(self, 'residual_variance'):
 56            my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n'
 57        if hasattr(self, 'chisquare_by_expected_chisquare'):
 58            my_str += '\u03C7\u00b2/\u03C7\u00b2exp  = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n'
 59        if hasattr(self, 'p_value'):
 60            my_str += 'p-value   = ' + f'{self.p_value:2.4f}' + '\n'
 61        if hasattr(self, 't2_p_value'):
 62            my_str += 't\u00B2p-value = ' + f'{self.t2_p_value:2.4f}' + '\n'
 63        my_str += 'Fit parameters:\n'
 64        for i_par, par in enumerate(self.fit_parameters):
 65            my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n'
 66        return my_str
 67
 68    def __repr__(self):
 69        m = max(map(len, list(self.__dict__.keys()))) + 1
 70        return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())])
 71
 72
 73def least_squares(x, y, func, priors=None, silent=False, **kwargs):
 74    r'''Performs a non-linear fit to y = func(x).
 75
 76    Parameters
 77    ----------
 78    x : list
 79        list of floats.
 80    y : list
 81        list of Obs.
 82    func : object
 83        fit function, has to be of the form
 84
 85        ```python
 86        import autograd.numpy as anp
 87
 88        def func(a, x):
 89            return a[0] + a[1] * x + a[2] * anp.sinh(x)
 90        ```
 91
 92        For multiple x values func can be of the form
 93
 94        ```python
 95        def func(a, x):
 96            (x1, x2) = x
 97            return a[0] * x1 ** 2 + a[1] * x2
 98        ```
 99
100        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
101        will not work.
102    priors : list, optional
103        priors has to be a list with an entry for every parameter in the fit. The entries can either be
104        Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like
105        0.548(23), 500(40) or 0.5(0.4)
106    silent : bool, optional
107        If true all output to the console is omitted (default False).
108    initial_guess : list
109        can provide an initial guess for the input parameters. Relevant for
110        non-linear fits with many parameters. In case of correlated fits the guess is used to perform
111        an uncorrelated fit which then serves as guess for the correlated fit.
112    method : str, optional
113        can be used to choose an alternative method for the minimization of chisquare.
114        The possible methods are the ones which can be used for scipy.optimize.minimize and
115        migrad of iminuit. If no method is specified, Levenberg-Marquard is used.
116        Reliable alternatives are migrad, Powell and Nelder-Mead.
117    correlated_fit : bool
118        If True, use the full inverse covariance matrix in the definition of the chisquare cost function.
119        For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
120        In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix).
121        This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning).
122        At the moment this option only works for `prior==None` and when no `method` is given.
123    expected_chisquare : bool
124        If True estimates the expected chisquare which is
125        corrected by effects caused by correlated input data (default False).
126    resplot : bool
127        If True, a plot which displays fit, data and residuals is generated (default False).
128    qqplot : bool
129        If True, a quantile-quantile plot of the fit result is generated (default False).
130    num_grad : bool
131        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
132
133    Returns
134    -------
135    output : Fit_result
136        Parameters and information on the fitted result.
137    '''
138    if priors is not None:
139        return _prior_fit(x, y, func, priors, silent=silent, **kwargs)
140    else:
141        return _standard_fit(x, y, func, silent=silent, **kwargs)
142
143
144def total_least_squares(x, y, func, silent=False, **kwargs):
145    r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
146
147    Parameters
148    ----------
149    x : list
150        list of Obs, or a tuple of lists of Obs
151    y : list
152        list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
153    func : object
154        func has to be of the form
155
156        ```python
157        import autograd.numpy as anp
158
159        def func(a, x):
160            return a[0] + a[1] * x + a[2] * anp.sinh(x)
161        ```
162
163        For multiple x values func can be of the form
164
165        ```python
166        def func(a, x):
167            (x1, x2) = x
168            return a[0] * x1 ** 2 + a[1] * x2
169        ```
170
171        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
172        will not work.
173    silent : bool, optional
174        If true all output to the console is omitted (default False).
175    initial_guess : list
176        can provide an initial guess for the input parameters. Relevant for non-linear
177        fits with many parameters.
178    expected_chisquare : bool
179        If true prints the expected chisquare which is
180        corrected by effects caused by correlated input data.
181        This can take a while as the full correlation matrix
182        has to be calculated (default False).
183    num_grad : bool
184        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
185
186    Notes
187    -----
188    Based on the orthogonal distance regression module of scipy.
189
190    Returns
191    -------
192    output : Fit_result
193        Parameters and information on the fitted result.
194    '''
195
196    output = Fit_result()
197
198    output.fit_function = func
199
200    x = np.array(x)
201
202    x_shape = x.shape
203
204    if kwargs.get('num_grad') is True:
205        jacobian = num_jacobian
206        hessian = num_hessian
207    else:
208        jacobian = auto_jacobian
209        hessian = auto_hessian
210
211    if not callable(func):
212        raise TypeError('func has to be a function.')
213
214    for i in range(42):
215        try:
216            func(np.arange(i), x.T[0])
217        except TypeError:
218            continue
219        except IndexError:
220            continue
221        else:
222            break
223    else:
224        raise RuntimeError("Fit function is not valid.")
225
226    n_parms = i
227    if not silent:
228        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
229
230    x_f = np.vectorize(lambda o: o.value)(x)
231    dx_f = np.vectorize(lambda o: o.dvalue)(x)
232    y_f = np.array([o.value for o in y])
233    dy_f = np.array([o.dvalue for o in y])
234
235    if np.any(np.asarray(dx_f) <= 0.0):
236        raise Exception('No x errors available, run the gamma method first.')
237
238    if np.any(np.asarray(dy_f) <= 0.0):
239        raise Exception('No y errors available, run the gamma method first.')
240
241    if 'initial_guess' in kwargs:
242        x0 = kwargs.get('initial_guess')
243        if len(x0) != n_parms:
244            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
245    else:
246        x0 = [1] * n_parms
247
248    data = RealData(x_f, y_f, sx=dx_f, sy=dy_f)
249    model = Model(func)
250    odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps)
251    odr.set_job(fit_type=0, deriv=1)
252    out = odr.run()
253
254    output.residual_variance = out.res_var
255
256    output.method = 'ODR'
257
258    output.message = out.stopreason
259
260    output.xplus = out.xplus
261
262    if not silent:
263        print('Method: ODR')
264        print(*out.stopreason)
265        print('Residual variance:', output.residual_variance)
266
267    if out.info > 3:
268        raise Exception('The minimization procedure did not converge.')
269
270    m = x_f.size
271
272    def odr_chisquare(p):
273        model = func(p[:n_parms], p[n_parms:].reshape(x_shape))
274        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2)
275        return chisq
276
277    if kwargs.get('expected_chisquare') is True:
278        W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel()))))
279
280        if kwargs.get('covariance') is not None:
281            cov = kwargs.get('covariance')
282        else:
283            cov = covariance(np.concatenate((y, x.ravel())))
284
285        number_of_x_parameters = int(m / x_f.shape[-1])
286
287        old_jac = jacobian(func)(out.beta, out.xplus)
288        fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0)))
289        fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0])))
290        new_jac = np.concatenate((fused_row1, fused_row2), axis=1)
291
292        A = W @ new_jac
293        P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
294        expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W)
295        if expected_chisquare <= 0.0:
296            warnings.warn("Negative expected_chisquare.", RuntimeWarning)
297            expected_chisquare = np.abs(expected_chisquare)
298        output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare
299        if not silent:
300            print('chisquare/expected_chisquare:',
301                  output.chisquare_by_expected_chisquare)
302
303    fitp = out.beta
304    try:
305        hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel())))
306    except TypeError:
307        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
308
309    def odr_chisquare_compact_x(d):
310        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
311        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
312        return chisq
313
314    jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel())))
315
316    # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv
317    try:
318        deriv_x = -scipy.linalg.solve(hess, jac_jac_x[:n_parms + m, n_parms + m:])
319    except np.linalg.LinAlgError:
320        raise Exception("Cannot invert hessian matrix.")
321
322    def odr_chisquare_compact_y(d):
323        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
324        chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
325        return chisq
326
327    jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f)))
328
329    # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv
330    try:
331        deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms + m, n_parms + m:])
332    except np.linalg.LinAlgError:
333        raise Exception("Cannot invert hessian matrix.")
334
335    result = []
336    for i in range(n_parms):
337        result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i])))
338
339    output.fit_parameters = result
340
341    output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel())))
342    output.dof = x.shape[-1] - n_parms
343    output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof)
344
345    return output
346
347
348def _prior_fit(x, y, func, priors, silent=False, **kwargs):
349    output = Fit_result()
350
351    output.fit_function = func
352
353    x = np.asarray(x)
354
355    if kwargs.get('num_grad') is True:
356        hessian = num_hessian
357    else:
358        hessian = auto_hessian
359
360    if not callable(func):
361        raise TypeError('func has to be a function.')
362
363    for i in range(100):
364        try:
365            func(np.arange(i), 0)
366        except TypeError:
367            continue
368        except IndexError:
369            continue
370        else:
371            break
372    else:
373        raise RuntimeError("Fit function is not valid.")
374
375    n_parms = i
376
377    if n_parms != len(priors):
378        raise Exception('Priors does not have the correct length.')
379
380    def extract_val_and_dval(string):
381        split_string = string.split('(')
382        if '.' in split_string[0] and '.' not in split_string[1][:-1]:
383            factor = 10 ** -len(split_string[0].partition('.')[2])
384        else:
385            factor = 1
386        return float(split_string[0]), float(split_string[1][:-1]) * factor
387
388    loc_priors = []
389    for i_n, i_prior in enumerate(priors):
390        if isinstance(i_prior, Obs):
391            loc_priors.append(i_prior)
392        else:
393            loc_val, loc_dval = extract_val_and_dval(i_prior)
394            loc_priors.append(cov_Obs(loc_val, loc_dval ** 2, '#prior' + str(i_n) + f"_{np.random.randint(2147483647):010d}"))
395
396    output.priors = loc_priors
397
398    if not silent:
399        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
400
401    y_f = [o.value for o in y]
402    dy_f = [o.dvalue for o in y]
403
404    if np.any(np.asarray(dy_f) <= 0.0):
405        raise Exception('No y errors available, run the gamma method first.')
406
407    p_f = [o.value for o in loc_priors]
408    dp_f = [o.dvalue for o in loc_priors]
409
410    if np.any(np.asarray(dp_f) <= 0.0):
411        raise Exception('No prior errors available, run the gamma method first.')
412
413    if 'initial_guess' in kwargs:
414        x0 = kwargs.get('initial_guess')
415        if len(x0) != n_parms:
416            raise Exception('Initial guess does not have the correct length.')
417    else:
418        x0 = p_f
419
420    def chisqfunc(p):
421        model = func(p, x)
422        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((p_f - p) / dp_f) ** 2)
423        return chisq
424
425    if not silent:
426        print('Method: migrad')
427
428    m = iminuit.Minuit(chisqfunc, x0)
429    m.errordef = 1
430    m.print_level = 0
431    if 'tol' in kwargs:
432        m.tol = kwargs.get('tol')
433    else:
434        m.tol = 1e-4
435    m.migrad()
436    params = np.asarray(m.values)
437
438    output.chisquare_by_dof = m.fval / len(x)
439
440    output.method = 'migrad'
441
442    if not silent:
443        print('chisquare/d.o.f.:', output.chisquare_by_dof)
444
445    if not m.fmin.is_valid:
446        raise Exception('The minimization procedure did not converge.')
447
448    hess = hessian(chisqfunc)(params)
449    hess_inv = np.linalg.pinv(hess)
450
451    def chisqfunc_compact(d):
452        model = func(d[:n_parms], x)
453        chisq = anp.sum(((d[n_parms: n_parms + len(x)] - model) / dy_f) ** 2) + anp.sum(((d[n_parms + len(x):] - d[:n_parms]) / dp_f) ** 2)
454        return chisq
455
456    jac_jac = hessian(chisqfunc_compact)(np.concatenate((params, y_f, p_f)))
457
458    deriv = -hess_inv @ jac_jac[:n_parms, n_parms:]
459
460    result = []
461    for i in range(n_parms):
462        result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * params[i], list(y) + list(loc_priors), man_grad=list(deriv[i])))
463
464    output.fit_parameters = result
465    output.chisquare = chisqfunc(np.asarray(params))
466
467    if kwargs.get('resplot') is True:
468        residual_plot(x, y, func, result)
469
470    if kwargs.get('qqplot') is True:
471        qqplot(x, y, func, result)
472
473    return output
474
475
476def _standard_fit(x, y, func, silent=False, **kwargs):
477
478    output = Fit_result()
479
480    output.fit_function = func
481
482    x = np.asarray(x)
483
484    if kwargs.get('num_grad') is True:
485        jacobian = num_jacobian
486        hessian = num_hessian
487    else:
488        jacobian = auto_jacobian
489        hessian = auto_hessian
490
491    if x.shape[-1] != len(y):
492        raise Exception('x and y input have to have the same length')
493
494    if len(x.shape) > 2:
495        raise Exception('Unknown format for x values')
496
497    if not callable(func):
498        raise TypeError('func has to be a function.')
499
500    for i in range(42):
501        try:
502            func(np.arange(i), x.T[0])
503        except TypeError:
504            continue
505        except IndexError:
506            continue
507        else:
508            break
509    else:
510        raise RuntimeError("Fit function is not valid.")
511
512    n_parms = i
513
514    if not silent:
515        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
516
517    y_f = [o.value for o in y]
518    dy_f = [o.dvalue for o in y]
519
520    if np.any(np.asarray(dy_f) <= 0.0):
521        raise Exception('No y errors available, run the gamma method first.')
522
523    if 'initial_guess' in kwargs:
524        x0 = kwargs.get('initial_guess')
525        if len(x0) != n_parms:
526            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
527    else:
528        x0 = [0.1] * n_parms
529
530    if kwargs.get('correlated_fit') is True:
531        corr = covariance(y, correlation=True, **kwargs)
532        covdiag = np.diag(1 / np.asarray(dy_f))
533        condn = np.linalg.cond(corr)
534        if condn > 0.1 / np.finfo(float).eps:
535            raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
536        if condn > 1e13:
537            warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
538        chol = np.linalg.cholesky(corr)
539        chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
540
541        def chisqfunc_corr(p):
542            model = func(p, x)
543            chisq = anp.sum(anp.dot(chol_inv, (y_f - model)) ** 2)
544            return chisq
545
546    def chisqfunc(p):
547        model = func(p, x)
548        chisq = anp.sum(((y_f - model) / dy_f) ** 2)
549        return chisq
550
551    output.method = kwargs.get('method', 'Levenberg-Marquardt')
552    if not silent:
553        print('Method:', output.method)
554
555    if output.method != 'Levenberg-Marquardt':
556        if output.method == 'migrad':
557            fit_result = iminuit.minimize(chisqfunc, x0, tol=1e-4)  # Stopping criterion 0.002 * tol * errordef
558            if kwargs.get('correlated_fit') is True:
559                fit_result = iminuit.minimize(chisqfunc_corr, fit_result.x, tol=1e-4)  # Stopping criterion 0.002 * tol * errordef
560            output.iterations = fit_result.nfev
561        else:
562            fit_result = scipy.optimize.minimize(chisqfunc, x0, method=kwargs.get('method'), tol=1e-12)
563            if kwargs.get('correlated_fit') is True:
564                fit_result = scipy.optimize.minimize(chisqfunc_corr, fit_result.x, method=kwargs.get('method'), tol=1e-12)
565            output.iterations = fit_result.nit
566
567        chisquare = fit_result.fun
568
569    else:
570        if kwargs.get('correlated_fit') is True:
571            def chisqfunc_residuals_corr(p):
572                model = func(p, x)
573                chisq = anp.dot(chol_inv, (y_f - model))
574                return chisq
575
576        def chisqfunc_residuals(p):
577            model = func(p, x)
578            chisq = ((y_f - model) / dy_f)
579            return chisq
580
581        fit_result = scipy.optimize.least_squares(chisqfunc_residuals, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
582        if kwargs.get('correlated_fit') is True:
583            fit_result = scipy.optimize.least_squares(chisqfunc_residuals_corr, fit_result.x, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
584
585        chisquare = np.sum(fit_result.fun ** 2)
586        if kwargs.get('correlated_fit') is True:
587            assert np.isclose(chisquare, chisqfunc_corr(fit_result.x), atol=1e-14)
588        else:
589            assert np.isclose(chisquare, chisqfunc(fit_result.x), atol=1e-14)
590
591        output.iterations = fit_result.nfev
592
593    if not fit_result.success:
594        raise Exception('The minimization procedure did not converge.')
595
596    if x.shape[-1] - n_parms > 0:
597        output.chisquare_by_dof = chisquare / (x.shape[-1] - n_parms)
598    else:
599        output.chisquare_by_dof = float('nan')
600
601    output.message = fit_result.message
602    if not silent:
603        print(fit_result.message)
604        print('chisquare/d.o.f.:', output.chisquare_by_dof)
605
606    if kwargs.get('expected_chisquare') is True:
607        if kwargs.get('correlated_fit') is not True:
608            W = np.diag(1 / np.asarray(dy_f))
609            cov = covariance(y)
610            A = W @ jacobian(func)(fit_result.x, x)
611            P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
612            expected_chisquare = np.trace((np.identity(x.shape[-1]) - P_phi) @ W @ cov @ W)
613            output.chisquare_by_expected_chisquare = chisquare / expected_chisquare
614            if not silent:
615                print('chisquare/expected_chisquare:',
616                      output.chisquare_by_expected_chisquare)
617
618    fitp = fit_result.x
619    try:
620        if kwargs.get('correlated_fit') is True:
621            hess = hessian(chisqfunc_corr)(fitp)
622        else:
623            hess = hessian(chisqfunc)(fitp)
624    except TypeError:
625        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
626
627    if kwargs.get('correlated_fit') is True:
628        def chisqfunc_compact(d):
629            model = func(d[:n_parms], x)
630            chisq = anp.sum(anp.dot(chol_inv, (d[n_parms:] - model)) ** 2)
631            return chisq
632
633    else:
634        def chisqfunc_compact(d):
635            model = func(d[:n_parms], x)
636            chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2)
637            return chisq
638
639    jac_jac = hessian(chisqfunc_compact)(np.concatenate((fitp, y_f)))
640
641    # Compute hess^{-1} @ jac_jac[:n_parms, n_parms:] using LAPACK dgesv
642    try:
643        deriv = -scipy.linalg.solve(hess, jac_jac[:n_parms, n_parms:])
644    except np.linalg.LinAlgError:
645        raise Exception("Cannot invert hessian matrix.")
646
647    result = []
648    for i in range(n_parms):
649        result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * fit_result.x[i], list(y), man_grad=list(deriv[i])))
650
651    output.fit_parameters = result
652
653    output.chisquare = chisquare
654    output.dof = x.shape[-1] - n_parms
655    output.p_value = 1 - scipy.stats.chi2.cdf(output.chisquare, output.dof)
656    # Hotelling t-squared p-value for correlated fits.
657    if kwargs.get('correlated_fit') is True:
658        n_cov = np.min(np.vectorize(lambda x: x.N)(y))
659        output.t2_p_value = 1 - scipy.stats.f.cdf((n_cov - output.dof) / (output.dof * (n_cov - 1)) * output.chisquare,
660                                                  output.dof, n_cov - output.dof)
661
662    if kwargs.get('resplot') is True:
663        residual_plot(x, y, func, result)
664
665    if kwargs.get('qqplot') is True:
666        qqplot(x, y, func, result)
667
668    return output
669
670
671def fit_lin(x, y, **kwargs):
672    """Performs a linear fit to y = n + m * x and returns two Obs n, m.
673
674    Parameters
675    ----------
676    x : list
677        Can either be a list of floats in which case no xerror is assumed, or
678        a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
679    y : list
680        List of Obs, the dvalues of the Obs are used as yerror for the fit.
681
682    Returns
683    -------
684    fit_parameters : list[Obs]
685        LIist of fitted observables.
686    """
687
688    def f(a, x):
689        y = a[0] + a[1] * x
690        return y
691
692    if all(isinstance(n, Obs) for n in x):
693        out = total_least_squares(x, y, f, **kwargs)
694        return out.fit_parameters
695    elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray):
696        out = least_squares(x, y, f, **kwargs)
697        return out.fit_parameters
698    else:
699        raise Exception('Unsupported types for x')
700
701
702def qqplot(x, o_y, func, p):
703    """Generates a quantile-quantile plot of the fit result which can be used to
704       check if the residuals of the fit are gaussian distributed.
705
706    Returns
707    -------
708    None
709    """
710
711    residuals = []
712    for i_x, i_y in zip(x, o_y):
713        residuals.append((i_y - func(p, i_x)) / i_y.dvalue)
714    residuals = sorted(residuals)
715    my_y = [o.value for o in residuals]
716    probplot = scipy.stats.probplot(my_y)
717    my_x = probplot[0][0]
718    plt.figure(figsize=(8, 8 / 1.618))
719    plt.errorbar(my_x, my_y, fmt='o')
720    fit_start = my_x[0]
721    fit_stop = my_x[-1]
722    samples = np.arange(fit_start, fit_stop, 0.01)
723    plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution')
724    plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-')
725
726    plt.xlabel('Theoretical quantiles')
727    plt.ylabel('Ordered Values')
728    plt.legend()
729    plt.draw()
730
731
732def residual_plot(x, y, func, fit_res):
733    """Generates a plot which compares the fit to the data and displays the corresponding residuals
734
735    Returns
736    -------
737    None
738    """
739    sorted_x = sorted(x)
740    xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0])
741    xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2])
742    x_samples = np.arange(xstart, xstop + 0.01, 0.01)
743
744    plt.figure(figsize=(8, 8 / 1.618))
745    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0)
746    ax0 = plt.subplot(gs[0])
747    ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data')
748    ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0)
749    ax0.set_xticklabels([])
750    ax0.set_xlim([xstart, xstop])
751    ax0.set_xticklabels([])
752    ax0.legend()
753
754    residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y])
755    ax1 = plt.subplot(gs[1])
756    ax1.plot(x, residuals, 'ko', ls='none', markersize=5)
757    ax1.tick_params(direction='out')
758    ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True)
759    ax1.axhline(y=0.0, ls='--', color='k', marker=" ")
760    ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k')
761    ax1.set_xlim([xstart, xstop])
762    ax1.set_ylabel('Residuals')
763    plt.subplots_adjust(wspace=None, hspace=None)
764    plt.draw()
765
766
767def error_band(x, func, beta):
768    """Calculate the error band for an array of sample values x, for given fit function func with optimized parameters beta.
769
770    Returns
771    -------
772    err : np.array(Obs)
773        Error band for an array of sample values x
774    """
775    cov = covariance(beta)
776    if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps):
777        warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning)
778
779    deriv = []
780    for i, item in enumerate(x):
781        deriv.append(np.array(egrad(func)([o.value for o in beta], item)))
782
783    err = []
784    for i, item in enumerate(x):
785        err.append(np.sqrt(deriv[i] @ cov @ deriv[i]))
786    err = np.array(err)
787
788    return err
789
790
791def ks_test(objects=None):
792    """Performs a Kolmogorov–Smirnov test for the p-values of all fit object.
793
794    Parameters
795    ----------
796    objects : list
797        List of fit results to include in the analysis (optional).
798
799    Returns
800    -------
801    None
802    """
803
804    if objects is None:
805        obs_list = []
806        for obj in gc.get_objects():
807            if isinstance(obj, Fit_result):
808                obs_list.append(obj)
809    else:
810        obs_list = objects
811
812    p_values = [o.p_value for o in obs_list]
813
814    bins = len(p_values)
815    x = np.arange(0, 1.001, 0.001)
816    plt.plot(x, x, 'k', zorder=1)
817    plt.xlim(0, 1)
818    plt.ylim(0, 1)
819    plt.xlabel('p-value')
820    plt.ylabel('Cumulative probability')
821    plt.title(str(bins) + ' p-values')
822
823    n = np.arange(1, bins + 1) / np.float64(bins)
824    Xs = np.sort(p_values)
825    plt.step(Xs, n)
826    diffs = n - Xs
827    loc_max_diff = np.argmax(np.abs(diffs))
828    loc = Xs[loc_max_diff]
829    plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0))
830    plt.draw()
831
832    print(scipy.stats.kstest(p_values, 'uniform'))
class Fit_result(collections.abc.Sequence):
21class Fit_result(Sequence):
22    """Represents fit results.
23
24    Attributes
25    ----------
26    fit_parameters : list
27        results for the individual fit parameters,
28        also accessible via indices.
29    chisquare_by_dof : float
30        reduced chisquare.
31    p_value : float
32        p-value of the fit
33    t2_p_value : float
34        Hotelling t-squared p-value for correlated fits.
35    """
36
37    def __init__(self):
38        self.fit_parameters = None
39
40    def __getitem__(self, idx):
41        return self.fit_parameters[idx]
42
43    def __len__(self):
44        return len(self.fit_parameters)
45
46    def gamma_method(self, **kwargs):
47        """Apply the gamma method to all fit parameters"""
48        [o.gamma_method(**kwargs) for o in self.fit_parameters]
49
50    gm = gamma_method
51
52    def __str__(self):
53        my_str = 'Goodness of fit:\n'
54        if hasattr(self, 'chisquare_by_dof'):
55            my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n'
56        elif hasattr(self, 'residual_variance'):
57            my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n'
58        if hasattr(self, 'chisquare_by_expected_chisquare'):
59            my_str += '\u03C7\u00b2/\u03C7\u00b2exp  = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n'
60        if hasattr(self, 'p_value'):
61            my_str += 'p-value   = ' + f'{self.p_value:2.4f}' + '\n'
62        if hasattr(self, 't2_p_value'):
63            my_str += 't\u00B2p-value = ' + f'{self.t2_p_value:2.4f}' + '\n'
64        my_str += 'Fit parameters:\n'
65        for i_par, par in enumerate(self.fit_parameters):
66            my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n'
67        return my_str
68
69    def __repr__(self):
70        m = max(map(len, list(self.__dict__.keys()))) + 1
71        return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())])

Represents fit results.

Attributes
  • fit_parameters (list): results for the individual fit parameters, also accessible via indices.
  • chisquare_by_dof (float): reduced chisquare.
  • p_value (float): p-value of the fit
  • t2_p_value (float): Hotelling t-squared p-value for correlated fits.
Fit_result()
37    def __init__(self):
38        self.fit_parameters = None
def gamma_method(self, **kwargs):
46    def gamma_method(self, **kwargs):
47        """Apply the gamma method to all fit parameters"""
48        [o.gamma_method(**kwargs) for o in self.fit_parameters]

Apply the gamma method to all fit parameters

def gm(self, **kwargs):
46    def gamma_method(self, **kwargs):
47        """Apply the gamma method to all fit parameters"""
48        [o.gamma_method(**kwargs) for o in self.fit_parameters]

Apply the gamma method to all fit parameters

Inherited Members
collections.abc.Sequence
index
count
def least_squares(x, y, func, priors=None, silent=False, **kwargs):
 74def least_squares(x, y, func, priors=None, silent=False, **kwargs):
 75    r'''Performs a non-linear fit to y = func(x).
 76
 77    Parameters
 78    ----------
 79    x : list
 80        list of floats.
 81    y : list
 82        list of Obs.
 83    func : object
 84        fit function, has to be of the form
 85
 86        ```python
 87        import autograd.numpy as anp
 88
 89        def func(a, x):
 90            return a[0] + a[1] * x + a[2] * anp.sinh(x)
 91        ```
 92
 93        For multiple x values func can be of the form
 94
 95        ```python
 96        def func(a, x):
 97            (x1, x2) = x
 98            return a[0] * x1 ** 2 + a[1] * x2
 99        ```
100
101        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
102        will not work.
103    priors : list, optional
104        priors has to be a list with an entry for every parameter in the fit. The entries can either be
105        Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like
106        0.548(23), 500(40) or 0.5(0.4)
107    silent : bool, optional
108        If true all output to the console is omitted (default False).
109    initial_guess : list
110        can provide an initial guess for the input parameters. Relevant for
111        non-linear fits with many parameters. In case of correlated fits the guess is used to perform
112        an uncorrelated fit which then serves as guess for the correlated fit.
113    method : str, optional
114        can be used to choose an alternative method for the minimization of chisquare.
115        The possible methods are the ones which can be used for scipy.optimize.minimize and
116        migrad of iminuit. If no method is specified, Levenberg-Marquard is used.
117        Reliable alternatives are migrad, Powell and Nelder-Mead.
118    correlated_fit : bool
119        If True, use the full inverse covariance matrix in the definition of the chisquare cost function.
120        For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
121        In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix).
122        This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning).
123        At the moment this option only works for `prior==None` and when no `method` is given.
124    expected_chisquare : bool
125        If True estimates the expected chisquare which is
126        corrected by effects caused by correlated input data (default False).
127    resplot : bool
128        If True, a plot which displays fit, data and residuals is generated (default False).
129    qqplot : bool
130        If True, a quantile-quantile plot of the fit result is generated (default False).
131    num_grad : bool
132        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
133
134    Returns
135    -------
136    output : Fit_result
137        Parameters and information on the fitted result.
138    '''
139    if priors is not None:
140        return _prior_fit(x, y, func, priors, silent=silent, **kwargs)
141    else:
142        return _standard_fit(x, y, func, silent=silent, **kwargs)

Performs a non-linear fit to y = func(x).

Parameters
  • x (list): list of floats.
  • y (list): list of Obs.
  • func (object): fit function, has to be of the form

    import autograd.numpy as anp
    
    def func(a, x):
       return a[0] + a[1] * x + a[2] * anp.sinh(x)
    

    For multiple x values func can be of the form

    def func(a, x):
       (x1, x2) = x
       return a[0] * x1 ** 2 + a[1] * x2
    

    It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.

  • priors (list, optional): priors has to be a list with an entry for every parameter in the fit. The entries can either be Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 0.548(23), 500(40) or 0.5(0.4)
  • silent (bool, optional): If true all output to the console is omitted (default False).
  • initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters. In case of correlated fits the guess is used to perform an uncorrelated fit which then serves as guess for the correlated fit.
  • method (str, optional): can be used to choose an alternative method for the minimization of chisquare. The possible methods are the ones which can be used for scipy.optimize.minimize and migrad of iminuit. If no method is specified, Levenberg-Marquard is used. Reliable alternatives are migrad, Powell and Nelder-Mead.
  • correlated_fit (bool): If True, use the full inverse covariance matrix in the definition of the chisquare cost function. For details about how the covariance matrix is estimated see pyerrors.obs.covariance. In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix). This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning). At the moment this option only works for prior==None and when no method is given.
  • expected_chisquare (bool): If True estimates the expected chisquare which is corrected by effects caused by correlated input data (default False).
  • resplot (bool): If True, a plot which displays fit, data and residuals is generated (default False).
  • qqplot (bool): If True, a quantile-quantile plot of the fit result is generated (default False).
  • num_grad (bool): Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
Returns
  • output (Fit_result): Parameters and information on the fitted result.
def total_least_squares(x, y, func, silent=False, **kwargs):
145def total_least_squares(x, y, func, silent=False, **kwargs):
146    r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
147
148    Parameters
149    ----------
150    x : list
151        list of Obs, or a tuple of lists of Obs
152    y : list
153        list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
154    func : object
155        func has to be of the form
156
157        ```python
158        import autograd.numpy as anp
159
160        def func(a, x):
161            return a[0] + a[1] * x + a[2] * anp.sinh(x)
162        ```
163
164        For multiple x values func can be of the form
165
166        ```python
167        def func(a, x):
168            (x1, x2) = x
169            return a[0] * x1 ** 2 + a[1] * x2
170        ```
171
172        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
173        will not work.
174    silent : bool, optional
175        If true all output to the console is omitted (default False).
176    initial_guess : list
177        can provide an initial guess for the input parameters. Relevant for non-linear
178        fits with many parameters.
179    expected_chisquare : bool
180        If true prints the expected chisquare which is
181        corrected by effects caused by correlated input data.
182        This can take a while as the full correlation matrix
183        has to be calculated (default False).
184    num_grad : bool
185        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
186
187    Notes
188    -----
189    Based on the orthogonal distance regression module of scipy.
190
191    Returns
192    -------
193    output : Fit_result
194        Parameters and information on the fitted result.
195    '''
196
197    output = Fit_result()
198
199    output.fit_function = func
200
201    x = np.array(x)
202
203    x_shape = x.shape
204
205    if kwargs.get('num_grad') is True:
206        jacobian = num_jacobian
207        hessian = num_hessian
208    else:
209        jacobian = auto_jacobian
210        hessian = auto_hessian
211
212    if not callable(func):
213        raise TypeError('func has to be a function.')
214
215    for i in range(42):
216        try:
217            func(np.arange(i), x.T[0])
218        except TypeError:
219            continue
220        except IndexError:
221            continue
222        else:
223            break
224    else:
225        raise RuntimeError("Fit function is not valid.")
226
227    n_parms = i
228    if not silent:
229        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
230
231    x_f = np.vectorize(lambda o: o.value)(x)
232    dx_f = np.vectorize(lambda o: o.dvalue)(x)
233    y_f = np.array([o.value for o in y])
234    dy_f = np.array([o.dvalue for o in y])
235
236    if np.any(np.asarray(dx_f) <= 0.0):
237        raise Exception('No x errors available, run the gamma method first.')
238
239    if np.any(np.asarray(dy_f) <= 0.0):
240        raise Exception('No y errors available, run the gamma method first.')
241
242    if 'initial_guess' in kwargs:
243        x0 = kwargs.get('initial_guess')
244        if len(x0) != n_parms:
245            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
246    else:
247        x0 = [1] * n_parms
248
249    data = RealData(x_f, y_f, sx=dx_f, sy=dy_f)
250    model = Model(func)
251    odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps)
252    odr.set_job(fit_type=0, deriv=1)
253    out = odr.run()
254
255    output.residual_variance = out.res_var
256
257    output.method = 'ODR'
258
259    output.message = out.stopreason
260
261    output.xplus = out.xplus
262
263    if not silent:
264        print('Method: ODR')
265        print(*out.stopreason)
266        print('Residual variance:', output.residual_variance)
267
268    if out.info > 3:
269        raise Exception('The minimization procedure did not converge.')
270
271    m = x_f.size
272
273    def odr_chisquare(p):
274        model = func(p[:n_parms], p[n_parms:].reshape(x_shape))
275        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2)
276        return chisq
277
278    if kwargs.get('expected_chisquare') is True:
279        W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel()))))
280
281        if kwargs.get('covariance') is not None:
282            cov = kwargs.get('covariance')
283        else:
284            cov = covariance(np.concatenate((y, x.ravel())))
285
286        number_of_x_parameters = int(m / x_f.shape[-1])
287
288        old_jac = jacobian(func)(out.beta, out.xplus)
289        fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0)))
290        fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0])))
291        new_jac = np.concatenate((fused_row1, fused_row2), axis=1)
292
293        A = W @ new_jac
294        P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
295        expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W)
296        if expected_chisquare <= 0.0:
297            warnings.warn("Negative expected_chisquare.", RuntimeWarning)
298            expected_chisquare = np.abs(expected_chisquare)
299        output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare
300        if not silent:
301            print('chisquare/expected_chisquare:',
302                  output.chisquare_by_expected_chisquare)
303
304    fitp = out.beta
305    try:
306        hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel())))
307    except TypeError:
308        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
309
310    def odr_chisquare_compact_x(d):
311        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
312        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
313        return chisq
314
315    jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel())))
316
317    # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv
318    try:
319        deriv_x = -scipy.linalg.solve(hess, jac_jac_x[:n_parms + m, n_parms + m:])
320    except np.linalg.LinAlgError:
321        raise Exception("Cannot invert hessian matrix.")
322
323    def odr_chisquare_compact_y(d):
324        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
325        chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
326        return chisq
327
328    jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f)))
329
330    # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv
331    try:
332        deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms + m, n_parms + m:])
333    except np.linalg.LinAlgError:
334        raise Exception("Cannot invert hessian matrix.")
335
336    result = []
337    for i in range(n_parms):
338        result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i])))
339
340    output.fit_parameters = result
341
342    output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel())))
343    output.dof = x.shape[-1] - n_parms
344    output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof)
345
346    return output

Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.

Parameters
  • x (list): list of Obs, or a tuple of lists of Obs
  • y (list): list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
  • func (object): func has to be of the form

    import autograd.numpy as anp
    
    def func(a, x):
       return a[0] + a[1] * x + a[2] * anp.sinh(x)
    

    For multiple x values func can be of the form

    def func(a, x):
       (x1, x2) = x
       return a[0] * x1 ** 2 + a[1] * x2
    

    It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.

  • silent (bool, optional): If true all output to the console is omitted (default False).
  • initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters.
  • expected_chisquare (bool): If true prints the expected chisquare which is corrected by effects caused by correlated input data. This can take a while as the full correlation matrix has to be calculated (default False).
  • num_grad (bool): Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
Notes

Based on the orthogonal distance regression module of scipy.

Returns
  • output (Fit_result): Parameters and information on the fitted result.
def fit_lin(x, y, **kwargs):
672def fit_lin(x, y, **kwargs):
673    """Performs a linear fit to y = n + m * x and returns two Obs n, m.
674
675    Parameters
676    ----------
677    x : list
678        Can either be a list of floats in which case no xerror is assumed, or
679        a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
680    y : list
681        List of Obs, the dvalues of the Obs are used as yerror for the fit.
682
683    Returns
684    -------
685    fit_parameters : list[Obs]
686        LIist of fitted observables.
687    """
688
689    def f(a, x):
690        y = a[0] + a[1] * x
691        return y
692
693    if all(isinstance(n, Obs) for n in x):
694        out = total_least_squares(x, y, f, **kwargs)
695        return out.fit_parameters
696    elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray):
697        out = least_squares(x, y, f, **kwargs)
698        return out.fit_parameters
699    else:
700        raise Exception('Unsupported types for x')

Performs a linear fit to y = n + m * x and returns two Obs n, m.

Parameters
  • x (list): Can either be a list of floats in which case no xerror is assumed, or a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
  • y (list): List of Obs, the dvalues of the Obs are used as yerror for the fit.
Returns
  • fit_parameters (list[Obs]): LIist of fitted observables.
def qqplot(x, o_y, func, p):
703def qqplot(x, o_y, func, p):
704    """Generates a quantile-quantile plot of the fit result which can be used to
705       check if the residuals of the fit are gaussian distributed.
706
707    Returns
708    -------
709    None
710    """
711
712    residuals = []
713    for i_x, i_y in zip(x, o_y):
714        residuals.append((i_y - func(p, i_x)) / i_y.dvalue)
715    residuals = sorted(residuals)
716    my_y = [o.value for o in residuals]
717    probplot = scipy.stats.probplot(my_y)
718    my_x = probplot[0][0]
719    plt.figure(figsize=(8, 8 / 1.618))
720    plt.errorbar(my_x, my_y, fmt='o')
721    fit_start = my_x[0]
722    fit_stop = my_x[-1]
723    samples = np.arange(fit_start, fit_stop, 0.01)
724    plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution')
725    plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-')
726
727    plt.xlabel('Theoretical quantiles')
728    plt.ylabel('Ordered Values')
729    plt.legend()
730    plt.draw()

Generates a quantile-quantile plot of the fit result which can be used to check if the residuals of the fit are gaussian distributed.

Returns
  • None
def residual_plot(x, y, func, fit_res):
733def residual_plot(x, y, func, fit_res):
734    """Generates a plot which compares the fit to the data and displays the corresponding residuals
735
736    Returns
737    -------
738    None
739    """
740    sorted_x = sorted(x)
741    xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0])
742    xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2])
743    x_samples = np.arange(xstart, xstop + 0.01, 0.01)
744
745    plt.figure(figsize=(8, 8 / 1.618))
746    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0)
747    ax0 = plt.subplot(gs[0])
748    ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data')
749    ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0)
750    ax0.set_xticklabels([])
751    ax0.set_xlim([xstart, xstop])
752    ax0.set_xticklabels([])
753    ax0.legend()
754
755    residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y])
756    ax1 = plt.subplot(gs[1])
757    ax1.plot(x, residuals, 'ko', ls='none', markersize=5)
758    ax1.tick_params(direction='out')
759    ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True)
760    ax1.axhline(y=0.0, ls='--', color='k', marker=" ")
761    ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k')
762    ax1.set_xlim([xstart, xstop])
763    ax1.set_ylabel('Residuals')
764    plt.subplots_adjust(wspace=None, hspace=None)
765    plt.draw()

Generates a plot which compares the fit to the data and displays the corresponding residuals

Returns
  • None
def error_band(x, func, beta):
768def error_band(x, func, beta):
769    """Calculate the error band for an array of sample values x, for given fit function func with optimized parameters beta.
770
771    Returns
772    -------
773    err : np.array(Obs)
774        Error band for an array of sample values x
775    """
776    cov = covariance(beta)
777    if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps):
778        warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning)
779
780    deriv = []
781    for i, item in enumerate(x):
782        deriv.append(np.array(egrad(func)([o.value for o in beta], item)))
783
784    err = []
785    for i, item in enumerate(x):
786        err.append(np.sqrt(deriv[i] @ cov @ deriv[i]))
787    err = np.array(err)
788
789    return err

Calculate the error band for an array of sample values x, for given fit function func with optimized parameters beta.

Returns
  • err (np.array(Obs)): Error band for an array of sample values x
def ks_test(objects=None):
792def ks_test(objects=None):
793    """Performs a Kolmogorov–Smirnov test for the p-values of all fit object.
794
795    Parameters
796    ----------
797    objects : list
798        List of fit results to include in the analysis (optional).
799
800    Returns
801    -------
802    None
803    """
804
805    if objects is None:
806        obs_list = []
807        for obj in gc.get_objects():
808            if isinstance(obj, Fit_result):
809                obs_list.append(obj)
810    else:
811        obs_list = objects
812
813    p_values = [o.p_value for o in obs_list]
814
815    bins = len(p_values)
816    x = np.arange(0, 1.001, 0.001)
817    plt.plot(x, x, 'k', zorder=1)
818    plt.xlim(0, 1)
819    plt.ylim(0, 1)
820    plt.xlabel('p-value')
821    plt.ylabel('Cumulative probability')
822    plt.title(str(bins) + ' p-values')
823
824    n = np.arange(1, bins + 1) / np.float64(bins)
825    Xs = np.sort(p_values)
826    plt.step(Xs, n)
827    diffs = n - Xs
828    loc_max_diff = np.argmax(np.abs(diffs))
829    loc = Xs[loc_max_diff]
830    plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0))
831    plt.draw()
832
833    print(scipy.stats.kstest(p_values, 'uniform'))

Performs a Kolmogorov–Smirnov test for the p-values of all fit object.

Parameters
  • objects (list): List of fit results to include in the analysis (optional).
Returns
  • None