pyerrors.fits

  1import gc
  2from collections.abc import Sequence
  3import warnings
  4import numpy as np
  5import autograd.numpy as anp
  6import scipy.optimize
  7import scipy.stats
  8import matplotlib.pyplot as plt
  9from matplotlib import gridspec
 10from scipy.odr import ODR, Model, RealData
 11import iminuit
 12from autograd import jacobian as auto_jacobian
 13from autograd import hessian as auto_hessian
 14from autograd import elementwise_grad as egrad
 15from numdifftools import Jacobian as num_jacobian
 16from numdifftools import Hessian as num_hessian
 17from .obs import Obs, derived_observable, covariance, cov_Obs
 18
 19
 20class Fit_result(Sequence):
 21    """Represents fit results.
 22
 23    Attributes
 24    ----------
 25    fit_parameters : list
 26        results for the individual fit parameters,
 27        also accessible via indices.
 28    chisquare_by_dof : float
 29        reduced chisquare.
 30    p_value : float
 31        p-value of the fit
 32    t2_p_value : float
 33        Hotelling t-squared p-value for correlated fits.
 34    """
 35
 36    def __init__(self):
 37        self.fit_parameters = None
 38
 39    def __getitem__(self, idx):
 40        return self.fit_parameters[idx]
 41
 42    def __len__(self):
 43        return len(self.fit_parameters)
 44
 45    def gamma_method(self, **kwargs):
 46        """Apply the gamma method to all fit parameters"""
 47        [o.gamma_method(**kwargs) for o in self.fit_parameters]
 48
 49    def __str__(self):
 50        my_str = 'Goodness of fit:\n'
 51        if hasattr(self, 'chisquare_by_dof'):
 52            my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n'
 53        elif hasattr(self, 'residual_variance'):
 54            my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n'
 55        if hasattr(self, 'chisquare_by_expected_chisquare'):
 56            my_str += '\u03C7\u00b2/\u03C7\u00b2exp  = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n'
 57        if hasattr(self, 'p_value'):
 58            my_str += 'p-value   = ' + f'{self.p_value:2.4f}' + '\n'
 59        if hasattr(self, 't2_p_value'):
 60            my_str += 't\u00B2p-value = ' + f'{self.t2_p_value:2.4f}' + '\n'
 61        my_str += 'Fit parameters:\n'
 62        for i_par, par in enumerate(self.fit_parameters):
 63            my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n'
 64        return my_str
 65
 66    def __repr__(self):
 67        m = max(map(len, list(self.__dict__.keys()))) + 1
 68        return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())])
 69
 70
 71def least_squares(x, y, func, priors=None, silent=False, **kwargs):
 72    r'''Performs a non-linear fit to y = func(x).
 73
 74    Parameters
 75    ----------
 76    x : list
 77        list of floats.
 78    y : list
 79        list of Obs.
 80    func : object
 81        fit function, has to be of the form
 82
 83        ```python
 84        import autograd.numpy as anp
 85
 86        def func(a, x):
 87            return a[0] + a[1] * x + a[2] * anp.sinh(x)
 88        ```
 89
 90        For multiple x values func can be of the form
 91
 92        ```python
 93        def func(a, x):
 94            (x1, x2) = x
 95            return a[0] * x1 ** 2 + a[1] * x2
 96        ```
 97
 98        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
 99        will not work.
100    priors : list, optional
101        priors has to be a list with an entry for every parameter in the fit. The entries can either be
102        Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like
103        0.548(23), 500(40) or 0.5(0.4)
104    silent : bool, optional
105        If true all output to the console is omitted (default False).
106    initial_guess : list
107        can provide an initial guess for the input parameters. Relevant for
108        non-linear fits with many parameters. In case of correlated fits the guess is used to perform
109        an uncorrelated fit which then serves as guess for the correlated fit.
110    method : str, optional
111        can be used to choose an alternative method for the minimization of chisquare.
112        The possible methods are the ones which can be used for scipy.optimize.minimize and
113        migrad of iminuit. If no method is specified, Levenberg-Marquard is used.
114        Reliable alternatives are migrad, Powell and Nelder-Mead.
115    correlated_fit : bool
116        If True, use the full inverse covariance matrix in the definition of the chisquare cost function.
117        For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
118        In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix).
119        This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning).
120        At the moment this option only works for `prior==None` and when no `method` is given.
121    expected_chisquare : bool
122        If True estimates the expected chisquare which is
123        corrected by effects caused by correlated input data (default False).
124    resplot : bool
125        If True, a plot which displays fit, data and residuals is generated (default False).
126    qqplot : bool
127        If True, a quantile-quantile plot of the fit result is generated (default False).
128    num_grad : bool
129        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
130    '''
131    if priors is not None:
132        return _prior_fit(x, y, func, priors, silent=silent, **kwargs)
133    else:
134        return _standard_fit(x, y, func, silent=silent, **kwargs)
135
136
137def total_least_squares(x, y, func, silent=False, **kwargs):
138    r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
139
140    Parameters
141    ----------
142    x : list
143        list of Obs, or a tuple of lists of Obs
144    y : list
145        list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
146    func : object
147        func has to be of the form
148
149        ```python
150        import autograd.numpy as anp
151
152        def func(a, x):
153            return a[0] + a[1] * x + a[2] * anp.sinh(x)
154        ```
155
156        For multiple x values func can be of the form
157
158        ```python
159        def func(a, x):
160            (x1, x2) = x
161            return a[0] * x1 ** 2 + a[1] * x2
162        ```
163
164        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
165        will not work.
166    silent : bool, optional
167        If true all output to the console is omitted (default False).
168    initial_guess : list
169        can provide an initial guess for the input parameters. Relevant for non-linear
170        fits with many parameters.
171    expected_chisquare : bool
172        If true prints the expected chisquare which is
173        corrected by effects caused by correlated input data.
174        This can take a while as the full correlation matrix
175        has to be calculated (default False).
176    num_grad : bool
177        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
178
179    Notes
180    -----
181    Based on the orthogonal distance regression module of scipy
182    '''
183
184    output = Fit_result()
185
186    output.fit_function = func
187
188    x = np.array(x)
189
190    x_shape = x.shape
191
192    if kwargs.get('num_grad') is True:
193        jacobian = num_jacobian
194        hessian = num_hessian
195    else:
196        jacobian = auto_jacobian
197        hessian = auto_hessian
198
199    if not callable(func):
200        raise TypeError('func has to be a function.')
201
202    for i in range(42):
203        try:
204            func(np.arange(i), x.T[0])
205        except TypeError:
206            continue
207        except IndexError:
208            continue
209        else:
210            break
211    else:
212        raise RuntimeError("Fit function is not valid.")
213
214    n_parms = i
215    if not silent:
216        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
217
218    x_f = np.vectorize(lambda o: o.value)(x)
219    dx_f = np.vectorize(lambda o: o.dvalue)(x)
220    y_f = np.array([o.value for o in y])
221    dy_f = np.array([o.dvalue for o in y])
222
223    if np.any(np.asarray(dx_f) <= 0.0):
224        raise Exception('No x errors available, run the gamma method first.')
225
226    if np.any(np.asarray(dy_f) <= 0.0):
227        raise Exception('No y errors available, run the gamma method first.')
228
229    if 'initial_guess' in kwargs:
230        x0 = kwargs.get('initial_guess')
231        if len(x0) != n_parms:
232            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
233    else:
234        x0 = [1] * n_parms
235
236    data = RealData(x_f, y_f, sx=dx_f, sy=dy_f)
237    model = Model(func)
238    odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps)
239    odr.set_job(fit_type=0, deriv=1)
240    out = odr.run()
241
242    output.residual_variance = out.res_var
243
244    output.method = 'ODR'
245
246    output.message = out.stopreason
247
248    output.xplus = out.xplus
249
250    if not silent:
251        print('Method: ODR')
252        print(*out.stopreason)
253        print('Residual variance:', output.residual_variance)
254
255    if out.info > 3:
256        raise Exception('The minimization procedure did not converge.')
257
258    m = x_f.size
259
260    def odr_chisquare(p):
261        model = func(p[:n_parms], p[n_parms:].reshape(x_shape))
262        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2)
263        return chisq
264
265    if kwargs.get('expected_chisquare') is True:
266        W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel()))))
267
268        if kwargs.get('covariance') is not None:
269            cov = kwargs.get('covariance')
270        else:
271            cov = covariance(np.concatenate((y, x.ravel())))
272
273        number_of_x_parameters = int(m / x_f.shape[-1])
274
275        old_jac = jacobian(func)(out.beta, out.xplus)
276        fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0)))
277        fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0])))
278        new_jac = np.concatenate((fused_row1, fused_row2), axis=1)
279
280        A = W @ new_jac
281        P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
282        expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W)
283        if expected_chisquare <= 0.0:
284            warnings.warn("Negative expected_chisquare.", RuntimeWarning)
285            expected_chisquare = np.abs(expected_chisquare)
286        output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare
287        if not silent:
288            print('chisquare/expected_chisquare:',
289                  output.chisquare_by_expected_chisquare)
290
291    fitp = out.beta
292    try:
293        hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel())))
294    except TypeError:
295        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
296
297    def odr_chisquare_compact_x(d):
298        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
299        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
300        return chisq
301
302    jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel())))
303
304    # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv
305    try:
306        deriv_x = -scipy.linalg.solve(hess, jac_jac_x[:n_parms + m, n_parms + m:])
307    except np.linalg.LinAlgError:
308        raise Exception("Cannot invert hessian matrix.")
309
310    def odr_chisquare_compact_y(d):
311        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
312        chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
313        return chisq
314
315    jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f)))
316
317    # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv
318    try:
319        deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms + m, n_parms + m:])
320    except np.linalg.LinAlgError:
321        raise Exception("Cannot invert hessian matrix.")
322
323    result = []
324    for i in range(n_parms):
325        result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i])))
326
327    output.fit_parameters = result
328
329    output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel())))
330    output.dof = x.shape[-1] - n_parms
331    output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof)
332
333    return output
334
335
336def _prior_fit(x, y, func, priors, silent=False, **kwargs):
337    output = Fit_result()
338
339    output.fit_function = func
340
341    x = np.asarray(x)
342
343    if kwargs.get('num_grad') is True:
344        hessian = num_hessian
345    else:
346        hessian = auto_hessian
347
348    if not callable(func):
349        raise TypeError('func has to be a function.')
350
351    for i in range(100):
352        try:
353            func(np.arange(i), 0)
354        except TypeError:
355            continue
356        except IndexError:
357            continue
358        else:
359            break
360    else:
361        raise RuntimeError("Fit function is not valid.")
362
363    n_parms = i
364
365    if n_parms != len(priors):
366        raise Exception('Priors does not have the correct length.')
367
368    def extract_val_and_dval(string):
369        split_string = string.split('(')
370        if '.' in split_string[0] and '.' not in split_string[1][:-1]:
371            factor = 10 ** -len(split_string[0].partition('.')[2])
372        else:
373            factor = 1
374        return float(split_string[0]), float(split_string[1][:-1]) * factor
375
376    loc_priors = []
377    for i_n, i_prior in enumerate(priors):
378        if isinstance(i_prior, Obs):
379            loc_priors.append(i_prior)
380        else:
381            loc_val, loc_dval = extract_val_and_dval(i_prior)
382            loc_priors.append(cov_Obs(loc_val, loc_dval ** 2, '#prior' + str(i_n) + f"_{np.random.randint(2147483647):010d}"))
383
384    output.priors = loc_priors
385
386    if not silent:
387        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
388
389    y_f = [o.value for o in y]
390    dy_f = [o.dvalue for o in y]
391
392    if np.any(np.asarray(dy_f) <= 0.0):
393        raise Exception('No y errors available, run the gamma method first.')
394
395    p_f = [o.value for o in loc_priors]
396    dp_f = [o.dvalue for o in loc_priors]
397
398    if np.any(np.asarray(dp_f) <= 0.0):
399        raise Exception('No prior errors available, run the gamma method first.')
400
401    if 'initial_guess' in kwargs:
402        x0 = kwargs.get('initial_guess')
403        if len(x0) != n_parms:
404            raise Exception('Initial guess does not have the correct length.')
405    else:
406        x0 = p_f
407
408    def chisqfunc(p):
409        model = func(p, x)
410        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((p_f - p) / dp_f) ** 2)
411        return chisq
412
413    if not silent:
414        print('Method: migrad')
415
416    m = iminuit.Minuit(chisqfunc, x0)
417    m.errordef = 1
418    m.print_level = 0
419    if 'tol' in kwargs:
420        m.tol = kwargs.get('tol')
421    else:
422        m.tol = 1e-4
423    m.migrad()
424    params = np.asarray(m.values)
425
426    output.chisquare_by_dof = m.fval / len(x)
427
428    output.method = 'migrad'
429
430    if not silent:
431        print('chisquare/d.o.f.:', output.chisquare_by_dof)
432
433    if not m.fmin.is_valid:
434        raise Exception('The minimization procedure did not converge.')
435
436    hess = hessian(chisqfunc)(params)
437    hess_inv = np.linalg.pinv(hess)
438
439    def chisqfunc_compact(d):
440        model = func(d[:n_parms], x)
441        chisq = anp.sum(((d[n_parms: n_parms + len(x)] - model) / dy_f) ** 2) + anp.sum(((d[n_parms + len(x):] - d[:n_parms]) / dp_f) ** 2)
442        return chisq
443
444    jac_jac = hessian(chisqfunc_compact)(np.concatenate((params, y_f, p_f)))
445
446    deriv = -hess_inv @ jac_jac[:n_parms, n_parms:]
447
448    result = []
449    for i in range(n_parms):
450        result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * params[i], list(y) + list(loc_priors), man_grad=list(deriv[i])))
451
452    output.fit_parameters = result
453    output.chisquare = chisqfunc(np.asarray(params))
454
455    if kwargs.get('resplot') is True:
456        residual_plot(x, y, func, result)
457
458    if kwargs.get('qqplot') is True:
459        qqplot(x, y, func, result)
460
461    return output
462
463
464def _standard_fit(x, y, func, silent=False, **kwargs):
465
466    output = Fit_result()
467
468    output.fit_function = func
469
470    x = np.asarray(x)
471
472    if kwargs.get('num_grad') is True:
473        jacobian = num_jacobian
474        hessian = num_hessian
475    else:
476        jacobian = auto_jacobian
477        hessian = auto_hessian
478
479    if x.shape[-1] != len(y):
480        raise Exception('x and y input have to have the same length')
481
482    if len(x.shape) > 2:
483        raise Exception('Unknown format for x values')
484
485    if not callable(func):
486        raise TypeError('func has to be a function.')
487
488    for i in range(42):
489        try:
490            func(np.arange(i), x.T[0])
491        except TypeError:
492            continue
493        except IndexError:
494            continue
495        else:
496            break
497    else:
498        raise RuntimeError("Fit function is not valid.")
499
500    n_parms = i
501
502    if not silent:
503        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
504
505    y_f = [o.value for o in y]
506    dy_f = [o.dvalue for o in y]
507
508    if np.any(np.asarray(dy_f) <= 0.0):
509        raise Exception('No y errors available, run the gamma method first.')
510
511    if 'initial_guess' in kwargs:
512        x0 = kwargs.get('initial_guess')
513        if len(x0) != n_parms:
514            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
515    else:
516        x0 = [0.1] * n_parms
517
518    if kwargs.get('correlated_fit') is True:
519        corr = covariance(y, correlation=True, **kwargs)
520        covdiag = np.diag(1 / np.asarray(dy_f))
521        condn = np.linalg.cond(corr)
522        if condn > 0.1 / np.finfo(float).eps:
523            raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
524        if condn > 1e13:
525            warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
526        chol = np.linalg.cholesky(corr)
527        chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
528
529        def chisqfunc_corr(p):
530            model = func(p, x)
531            chisq = anp.sum(anp.dot(chol_inv, (y_f - model)) ** 2)
532            return chisq
533
534    def chisqfunc(p):
535        model = func(p, x)
536        chisq = anp.sum(((y_f - model) / dy_f) ** 2)
537        return chisq
538
539    output.method = kwargs.get('method', 'Levenberg-Marquardt')
540    if not silent:
541        print('Method:', output.method)
542
543    if output.method != 'Levenberg-Marquardt':
544        if output.method == 'migrad':
545            fit_result = iminuit.minimize(chisqfunc, x0, tol=1e-4)  # Stopping criterion 0.002 * tol * errordef
546            if kwargs.get('correlated_fit') is True:
547                fit_result = iminuit.minimize(chisqfunc_corr, fit_result.x, tol=1e-4)  # Stopping criterion 0.002 * tol * errordef
548            output.iterations = fit_result.nfev
549        else:
550            fit_result = scipy.optimize.minimize(chisqfunc, x0, method=kwargs.get('method'), tol=1e-12)
551            if kwargs.get('correlated_fit') is True:
552                fit_result = scipy.optimize.minimize(chisqfunc_corr, fit_result.x, method=kwargs.get('method'), tol=1e-12)
553            output.iterations = fit_result.nit
554
555        chisquare = fit_result.fun
556
557    else:
558        if kwargs.get('correlated_fit') is True:
559            def chisqfunc_residuals_corr(p):
560                model = func(p, x)
561                chisq = anp.dot(chol_inv, (y_f - model))
562                return chisq
563
564        def chisqfunc_residuals(p):
565            model = func(p, x)
566            chisq = ((y_f - model) / dy_f)
567            return chisq
568
569        fit_result = scipy.optimize.least_squares(chisqfunc_residuals, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
570        if kwargs.get('correlated_fit') is True:
571            fit_result = scipy.optimize.least_squares(chisqfunc_residuals_corr, fit_result.x, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
572
573        chisquare = np.sum(fit_result.fun ** 2)
574        if kwargs.get('correlated_fit') is True:
575            assert np.isclose(chisquare, chisqfunc_corr(fit_result.x), atol=1e-14)
576        else:
577            assert np.isclose(chisquare, chisqfunc(fit_result.x), atol=1e-14)
578
579        output.iterations = fit_result.nfev
580
581    if not fit_result.success:
582        raise Exception('The minimization procedure did not converge.')
583
584    if x.shape[-1] - n_parms > 0:
585        output.chisquare_by_dof = chisquare / (x.shape[-1] - n_parms)
586    else:
587        output.chisquare_by_dof = float('nan')
588
589    output.message = fit_result.message
590    if not silent:
591        print(fit_result.message)
592        print('chisquare/d.o.f.:', output.chisquare_by_dof)
593
594    if kwargs.get('expected_chisquare') is True:
595        if kwargs.get('correlated_fit') is not True:
596            W = np.diag(1 / np.asarray(dy_f))
597            cov = covariance(y)
598            A = W @ jacobian(func)(fit_result.x, x)
599            P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
600            expected_chisquare = np.trace((np.identity(x.shape[-1]) - P_phi) @ W @ cov @ W)
601            output.chisquare_by_expected_chisquare = chisquare / expected_chisquare
602            if not silent:
603                print('chisquare/expected_chisquare:',
604                      output.chisquare_by_expected_chisquare)
605
606    fitp = fit_result.x
607    try:
608        if kwargs.get('correlated_fit') is True:
609            hess = hessian(chisqfunc_corr)(fitp)
610        else:
611            hess = hessian(chisqfunc)(fitp)
612    except TypeError:
613        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
614
615    if kwargs.get('correlated_fit') is True:
616        def chisqfunc_compact(d):
617            model = func(d[:n_parms], x)
618            chisq = anp.sum(anp.dot(chol_inv, (d[n_parms:] - model)) ** 2)
619            return chisq
620
621    else:
622        def chisqfunc_compact(d):
623            model = func(d[:n_parms], x)
624            chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2)
625            return chisq
626
627    jac_jac = hessian(chisqfunc_compact)(np.concatenate((fitp, y_f)))
628
629    # Compute hess^{-1} @ jac_jac[:n_parms, n_parms:] using LAPACK dgesv
630    try:
631        deriv = -scipy.linalg.solve(hess, jac_jac[:n_parms, n_parms:])
632    except np.linalg.LinAlgError:
633        raise Exception("Cannot invert hessian matrix.")
634
635    result = []
636    for i in range(n_parms):
637        result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * fit_result.x[i], list(y), man_grad=list(deriv[i])))
638
639    output.fit_parameters = result
640
641    output.chisquare = chisquare
642    output.dof = x.shape[-1] - n_parms
643    output.p_value = 1 - scipy.stats.chi2.cdf(output.chisquare, output.dof)
644    # Hotelling t-squared p-value for correlated fits.
645    if kwargs.get('correlated_fit') is True:
646        n_cov = np.min(np.vectorize(lambda x: x.N)(y))
647        output.t2_p_value = 1 - scipy.stats.f.cdf((n_cov - output.dof) / (output.dof * (n_cov - 1)) * output.chisquare,
648                                                  output.dof, n_cov - output.dof)
649
650    if kwargs.get('resplot') is True:
651        residual_plot(x, y, func, result)
652
653    if kwargs.get('qqplot') is True:
654        qqplot(x, y, func, result)
655
656    return output
657
658
659def fit_lin(x, y, **kwargs):
660    """Performs a linear fit to y = n + m * x and returns two Obs n, m.
661
662    Parameters
663    ----------
664    x : list
665        Can either be a list of floats in which case no xerror is assumed, or
666        a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
667    y : list
668        List of Obs, the dvalues of the Obs are used as yerror for the fit.
669    """
670
671    def f(a, x):
672        y = a[0] + a[1] * x
673        return y
674
675    if all(isinstance(n, Obs) for n in x):
676        out = total_least_squares(x, y, f, **kwargs)
677        return out.fit_parameters
678    elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray):
679        out = least_squares(x, y, f, **kwargs)
680        return out.fit_parameters
681    else:
682        raise Exception('Unsupported types for x')
683
684
685def qqplot(x, o_y, func, p):
686    """Generates a quantile-quantile plot of the fit result which can be used to
687       check if the residuals of the fit are gaussian distributed.
688    """
689
690    residuals = []
691    for i_x, i_y in zip(x, o_y):
692        residuals.append((i_y - func(p, i_x)) / i_y.dvalue)
693    residuals = sorted(residuals)
694    my_y = [o.value for o in residuals]
695    probplot = scipy.stats.probplot(my_y)
696    my_x = probplot[0][0]
697    plt.figure(figsize=(8, 8 / 1.618))
698    plt.errorbar(my_x, my_y, fmt='o')
699    fit_start = my_x[0]
700    fit_stop = my_x[-1]
701    samples = np.arange(fit_start, fit_stop, 0.01)
702    plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution')
703    plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-')
704
705    plt.xlabel('Theoretical quantiles')
706    plt.ylabel('Ordered Values')
707    plt.legend()
708    plt.draw()
709
710
711def residual_plot(x, y, func, fit_res):
712    """ Generates a plot which compares the fit to the data and displays the corresponding residuals"""
713    sorted_x = sorted(x)
714    xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0])
715    xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2])
716    x_samples = np.arange(xstart, xstop + 0.01, 0.01)
717
718    plt.figure(figsize=(8, 8 / 1.618))
719    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0)
720    ax0 = plt.subplot(gs[0])
721    ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data')
722    ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0)
723    ax0.set_xticklabels([])
724    ax0.set_xlim([xstart, xstop])
725    ax0.set_xticklabels([])
726    ax0.legend()
727
728    residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y])
729    ax1 = plt.subplot(gs[1])
730    ax1.plot(x, residuals, 'ko', ls='none', markersize=5)
731    ax1.tick_params(direction='out')
732    ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True)
733    ax1.axhline(y=0.0, ls='--', color='k', marker=" ")
734    ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k')
735    ax1.set_xlim([xstart, xstop])
736    ax1.set_ylabel('Residuals')
737    plt.subplots_adjust(wspace=None, hspace=None)
738    plt.draw()
739
740
741def error_band(x, func, beta):
742    """Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta."""
743    cov = covariance(beta)
744    if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps):
745        warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning)
746
747    deriv = []
748    for i, item in enumerate(x):
749        deriv.append(np.array(egrad(func)([o.value for o in beta], item)))
750
751    err = []
752    for i, item in enumerate(x):
753        err.append(np.sqrt(deriv[i] @ cov @ deriv[i]))
754    err = np.array(err)
755
756    return err
757
758
759def ks_test(objects=None):
760    """Performs a Kolmogorov–Smirnov test for the p-values of all fit object.
761
762    Parameters
763    ----------
764    objects : list
765        List of fit results to include in the analysis (optional).
766    """
767
768    if objects is None:
769        obs_list = []
770        for obj in gc.get_objects():
771            if isinstance(obj, Fit_result):
772                obs_list.append(obj)
773    else:
774        obs_list = objects
775
776    p_values = [o.p_value for o in obs_list]
777
778    bins = len(p_values)
779    x = np.arange(0, 1.001, 0.001)
780    plt.plot(x, x, 'k', zorder=1)
781    plt.xlim(0, 1)
782    plt.ylim(0, 1)
783    plt.xlabel('p-value')
784    plt.ylabel('Cumulative probability')
785    plt.title(str(bins) + ' p-values')
786
787    n = np.arange(1, bins + 1) / np.float64(bins)
788    Xs = np.sort(p_values)
789    plt.step(Xs, n)
790    diffs = n - Xs
791    loc_max_diff = np.argmax(np.abs(diffs))
792    loc = Xs[loc_max_diff]
793    plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0))
794    plt.draw()
795
796    print(scipy.stats.kstest(p_values, 'uniform'))
class Fit_result(collections.abc.Sequence):
21class Fit_result(Sequence):
22    """Represents fit results.
23
24    Attributes
25    ----------
26    fit_parameters : list
27        results for the individual fit parameters,
28        also accessible via indices.
29    chisquare_by_dof : float
30        reduced chisquare.
31    p_value : float
32        p-value of the fit
33    t2_p_value : float
34        Hotelling t-squared p-value for correlated fits.
35    """
36
37    def __init__(self):
38        self.fit_parameters = None
39
40    def __getitem__(self, idx):
41        return self.fit_parameters[idx]
42
43    def __len__(self):
44        return len(self.fit_parameters)
45
46    def gamma_method(self, **kwargs):
47        """Apply the gamma method to all fit parameters"""
48        [o.gamma_method(**kwargs) for o in self.fit_parameters]
49
50    def __str__(self):
51        my_str = 'Goodness of fit:\n'
52        if hasattr(self, 'chisquare_by_dof'):
53            my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n'
54        elif hasattr(self, 'residual_variance'):
55            my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n'
56        if hasattr(self, 'chisquare_by_expected_chisquare'):
57            my_str += '\u03C7\u00b2/\u03C7\u00b2exp  = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n'
58        if hasattr(self, 'p_value'):
59            my_str += 'p-value   = ' + f'{self.p_value:2.4f}' + '\n'
60        if hasattr(self, 't2_p_value'):
61            my_str += 't\u00B2p-value = ' + f'{self.t2_p_value:2.4f}' + '\n'
62        my_str += 'Fit parameters:\n'
63        for i_par, par in enumerate(self.fit_parameters):
64            my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n'
65        return my_str
66
67    def __repr__(self):
68        m = max(map(len, list(self.__dict__.keys()))) + 1
69        return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())])

Represents fit results.

Attributes
  • fit_parameters (list): results for the individual fit parameters, also accessible via indices.
  • chisquare_by_dof (float): reduced chisquare.
  • p_value (float): p-value of the fit
  • t2_p_value (float): Hotelling t-squared p-value for correlated fits.
Fit_result()
37    def __init__(self):
38        self.fit_parameters = None
def gamma_method(self, **kwargs):
46    def gamma_method(self, **kwargs):
47        """Apply the gamma method to all fit parameters"""
48        [o.gamma_method(**kwargs) for o in self.fit_parameters]

Apply the gamma method to all fit parameters

Inherited Members
collections.abc.Sequence
index
count
def least_squares(x, y, func, priors=None, silent=False, **kwargs):
 72def least_squares(x, y, func, priors=None, silent=False, **kwargs):
 73    r'''Performs a non-linear fit to y = func(x).
 74
 75    Parameters
 76    ----------
 77    x : list
 78        list of floats.
 79    y : list
 80        list of Obs.
 81    func : object
 82        fit function, has to be of the form
 83
 84        ```python
 85        import autograd.numpy as anp
 86
 87        def func(a, x):
 88            return a[0] + a[1] * x + a[2] * anp.sinh(x)
 89        ```
 90
 91        For multiple x values func can be of the form
 92
 93        ```python
 94        def func(a, x):
 95            (x1, x2) = x
 96            return a[0] * x1 ** 2 + a[1] * x2
 97        ```
 98
 99        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
100        will not work.
101    priors : list, optional
102        priors has to be a list with an entry for every parameter in the fit. The entries can either be
103        Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like
104        0.548(23), 500(40) or 0.5(0.4)
105    silent : bool, optional
106        If true all output to the console is omitted (default False).
107    initial_guess : list
108        can provide an initial guess for the input parameters. Relevant for
109        non-linear fits with many parameters. In case of correlated fits the guess is used to perform
110        an uncorrelated fit which then serves as guess for the correlated fit.
111    method : str, optional
112        can be used to choose an alternative method for the minimization of chisquare.
113        The possible methods are the ones which can be used for scipy.optimize.minimize and
114        migrad of iminuit. If no method is specified, Levenberg-Marquard is used.
115        Reliable alternatives are migrad, Powell and Nelder-Mead.
116    correlated_fit : bool
117        If True, use the full inverse covariance matrix in the definition of the chisquare cost function.
118        For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
119        In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix).
120        This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning).
121        At the moment this option only works for `prior==None` and when no `method` is given.
122    expected_chisquare : bool
123        If True estimates the expected chisquare which is
124        corrected by effects caused by correlated input data (default False).
125    resplot : bool
126        If True, a plot which displays fit, data and residuals is generated (default False).
127    qqplot : bool
128        If True, a quantile-quantile plot of the fit result is generated (default False).
129    num_grad : bool
130        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
131    '''
132    if priors is not None:
133        return _prior_fit(x, y, func, priors, silent=silent, **kwargs)
134    else:
135        return _standard_fit(x, y, func, silent=silent, **kwargs)

Performs a non-linear fit to y = func(x).

Parameters
  • x (list): list of floats.
  • y (list): list of Obs.
  • func (object): fit function, has to be of the form

    import autograd.numpy as anp
    
    def func(a, x):
       return a[0] + a[1] * x + a[2] * anp.sinh(x)
    

    For multiple x values func can be of the form

    def func(a, x):
       (x1, x2) = x
       return a[0] * x1 ** 2 + a[1] * x2
    

    It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.

  • priors (list, optional): priors has to be a list with an entry for every parameter in the fit. The entries can either be Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 0.548(23), 500(40) or 0.5(0.4)
  • silent (bool, optional): If true all output to the console is omitted (default False).
  • initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters. In case of correlated fits the guess is used to perform an uncorrelated fit which then serves as guess for the correlated fit.
  • method (str, optional): can be used to choose an alternative method for the minimization of chisquare. The possible methods are the ones which can be used for scipy.optimize.minimize and migrad of iminuit. If no method is specified, Levenberg-Marquard is used. Reliable alternatives are migrad, Powell and Nelder-Mead.
  • correlated_fit (bool): If True, use the full inverse covariance matrix in the definition of the chisquare cost function. For details about how the covariance matrix is estimated see pyerrors.obs.covariance. In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix). This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning). At the moment this option only works for prior==None and when no method is given.
  • expected_chisquare (bool): If True estimates the expected chisquare which is corrected by effects caused by correlated input data (default False).
  • resplot (bool): If True, a plot which displays fit, data and residuals is generated (default False).
  • qqplot (bool): If True, a quantile-quantile plot of the fit result is generated (default False).
  • num_grad (bool): Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
def total_least_squares(x, y, func, silent=False, **kwargs):
138def total_least_squares(x, y, func, silent=False, **kwargs):
139    r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
140
141    Parameters
142    ----------
143    x : list
144        list of Obs, or a tuple of lists of Obs
145    y : list
146        list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
147    func : object
148        func has to be of the form
149
150        ```python
151        import autograd.numpy as anp
152
153        def func(a, x):
154            return a[0] + a[1] * x + a[2] * anp.sinh(x)
155        ```
156
157        For multiple x values func can be of the form
158
159        ```python
160        def func(a, x):
161            (x1, x2) = x
162            return a[0] * x1 ** 2 + a[1] * x2
163        ```
164
165        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
166        will not work.
167    silent : bool, optional
168        If true all output to the console is omitted (default False).
169    initial_guess : list
170        can provide an initial guess for the input parameters. Relevant for non-linear
171        fits with many parameters.
172    expected_chisquare : bool
173        If true prints the expected chisquare which is
174        corrected by effects caused by correlated input data.
175        This can take a while as the full correlation matrix
176        has to be calculated (default False).
177    num_grad : bool
178        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
179
180    Notes
181    -----
182    Based on the orthogonal distance regression module of scipy
183    '''
184
185    output = Fit_result()
186
187    output.fit_function = func
188
189    x = np.array(x)
190
191    x_shape = x.shape
192
193    if kwargs.get('num_grad') is True:
194        jacobian = num_jacobian
195        hessian = num_hessian
196    else:
197        jacobian = auto_jacobian
198        hessian = auto_hessian
199
200    if not callable(func):
201        raise TypeError('func has to be a function.')
202
203    for i in range(42):
204        try:
205            func(np.arange(i), x.T[0])
206        except TypeError:
207            continue
208        except IndexError:
209            continue
210        else:
211            break
212    else:
213        raise RuntimeError("Fit function is not valid.")
214
215    n_parms = i
216    if not silent:
217        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
218
219    x_f = np.vectorize(lambda o: o.value)(x)
220    dx_f = np.vectorize(lambda o: o.dvalue)(x)
221    y_f = np.array([o.value for o in y])
222    dy_f = np.array([o.dvalue for o in y])
223
224    if np.any(np.asarray(dx_f) <= 0.0):
225        raise Exception('No x errors available, run the gamma method first.')
226
227    if np.any(np.asarray(dy_f) <= 0.0):
228        raise Exception('No y errors available, run the gamma method first.')
229
230    if 'initial_guess' in kwargs:
231        x0 = kwargs.get('initial_guess')
232        if len(x0) != n_parms:
233            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
234    else:
235        x0 = [1] * n_parms
236
237    data = RealData(x_f, y_f, sx=dx_f, sy=dy_f)
238    model = Model(func)
239    odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps)
240    odr.set_job(fit_type=0, deriv=1)
241    out = odr.run()
242
243    output.residual_variance = out.res_var
244
245    output.method = 'ODR'
246
247    output.message = out.stopreason
248
249    output.xplus = out.xplus
250
251    if not silent:
252        print('Method: ODR')
253        print(*out.stopreason)
254        print('Residual variance:', output.residual_variance)
255
256    if out.info > 3:
257        raise Exception('The minimization procedure did not converge.')
258
259    m = x_f.size
260
261    def odr_chisquare(p):
262        model = func(p[:n_parms], p[n_parms:].reshape(x_shape))
263        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2)
264        return chisq
265
266    if kwargs.get('expected_chisquare') is True:
267        W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel()))))
268
269        if kwargs.get('covariance') is not None:
270            cov = kwargs.get('covariance')
271        else:
272            cov = covariance(np.concatenate((y, x.ravel())))
273
274        number_of_x_parameters = int(m / x_f.shape[-1])
275
276        old_jac = jacobian(func)(out.beta, out.xplus)
277        fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0)))
278        fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0])))
279        new_jac = np.concatenate((fused_row1, fused_row2), axis=1)
280
281        A = W @ new_jac
282        P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
283        expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W)
284        if expected_chisquare <= 0.0:
285            warnings.warn("Negative expected_chisquare.", RuntimeWarning)
286            expected_chisquare = np.abs(expected_chisquare)
287        output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare
288        if not silent:
289            print('chisquare/expected_chisquare:',
290                  output.chisquare_by_expected_chisquare)
291
292    fitp = out.beta
293    try:
294        hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel())))
295    except TypeError:
296        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
297
298    def odr_chisquare_compact_x(d):
299        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
300        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
301        return chisq
302
303    jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel())))
304
305    # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv
306    try:
307        deriv_x = -scipy.linalg.solve(hess, jac_jac_x[:n_parms + m, n_parms + m:])
308    except np.linalg.LinAlgError:
309        raise Exception("Cannot invert hessian matrix.")
310
311    def odr_chisquare_compact_y(d):
312        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
313        chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
314        return chisq
315
316    jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f)))
317
318    # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv
319    try:
320        deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms + m, n_parms + m:])
321    except np.linalg.LinAlgError:
322        raise Exception("Cannot invert hessian matrix.")
323
324    result = []
325    for i in range(n_parms):
326        result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i])))
327
328    output.fit_parameters = result
329
330    output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel())))
331    output.dof = x.shape[-1] - n_parms
332    output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof)
333
334    return output

Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.

Parameters
  • x (list): list of Obs, or a tuple of lists of Obs
  • y (list): list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
  • func (object): func has to be of the form

    import autograd.numpy as anp
    
    def func(a, x):
       return a[0] + a[1] * x + a[2] * anp.sinh(x)
    

    For multiple x values func can be of the form

    def func(a, x):
       (x1, x2) = x
       return a[0] * x1 ** 2 + a[1] * x2
    

    It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.

  • silent (bool, optional): If true all output to the console is omitted (default False).
  • initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters.
  • expected_chisquare (bool): If true prints the expected chisquare which is corrected by effects caused by correlated input data. This can take a while as the full correlation matrix has to be calculated (default False).
  • num_grad (bool): Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
Notes

Based on the orthogonal distance regression module of scipy

def fit_lin(x, y, **kwargs):
660def fit_lin(x, y, **kwargs):
661    """Performs a linear fit to y = n + m * x and returns two Obs n, m.
662
663    Parameters
664    ----------
665    x : list
666        Can either be a list of floats in which case no xerror is assumed, or
667        a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
668    y : list
669        List of Obs, the dvalues of the Obs are used as yerror for the fit.
670    """
671
672    def f(a, x):
673        y = a[0] + a[1] * x
674        return y
675
676    if all(isinstance(n, Obs) for n in x):
677        out = total_least_squares(x, y, f, **kwargs)
678        return out.fit_parameters
679    elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray):
680        out = least_squares(x, y, f, **kwargs)
681        return out.fit_parameters
682    else:
683        raise Exception('Unsupported types for x')

Performs a linear fit to y = n + m * x and returns two Obs n, m.

Parameters
  • x (list): Can either be a list of floats in which case no xerror is assumed, or a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
  • y (list): List of Obs, the dvalues of the Obs are used as yerror for the fit.
def qqplot(x, o_y, func, p):
686def qqplot(x, o_y, func, p):
687    """Generates a quantile-quantile plot of the fit result which can be used to
688       check if the residuals of the fit are gaussian distributed.
689    """
690
691    residuals = []
692    for i_x, i_y in zip(x, o_y):
693        residuals.append((i_y - func(p, i_x)) / i_y.dvalue)
694    residuals = sorted(residuals)
695    my_y = [o.value for o in residuals]
696    probplot = scipy.stats.probplot(my_y)
697    my_x = probplot[0][0]
698    plt.figure(figsize=(8, 8 / 1.618))
699    plt.errorbar(my_x, my_y, fmt='o')
700    fit_start = my_x[0]
701    fit_stop = my_x[-1]
702    samples = np.arange(fit_start, fit_stop, 0.01)
703    plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution')
704    plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-')
705
706    plt.xlabel('Theoretical quantiles')
707    plt.ylabel('Ordered Values')
708    plt.legend()
709    plt.draw()

Generates a quantile-quantile plot of the fit result which can be used to check if the residuals of the fit are gaussian distributed.

def residual_plot(x, y, func, fit_res):
712def residual_plot(x, y, func, fit_res):
713    """ Generates a plot which compares the fit to the data and displays the corresponding residuals"""
714    sorted_x = sorted(x)
715    xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0])
716    xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2])
717    x_samples = np.arange(xstart, xstop + 0.01, 0.01)
718
719    plt.figure(figsize=(8, 8 / 1.618))
720    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0)
721    ax0 = plt.subplot(gs[0])
722    ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data')
723    ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0)
724    ax0.set_xticklabels([])
725    ax0.set_xlim([xstart, xstop])
726    ax0.set_xticklabels([])
727    ax0.legend()
728
729    residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y])
730    ax1 = plt.subplot(gs[1])
731    ax1.plot(x, residuals, 'ko', ls='none', markersize=5)
732    ax1.tick_params(direction='out')
733    ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True)
734    ax1.axhline(y=0.0, ls='--', color='k', marker=" ")
735    ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k')
736    ax1.set_xlim([xstart, xstop])
737    ax1.set_ylabel('Residuals')
738    plt.subplots_adjust(wspace=None, hspace=None)
739    plt.draw()

Generates a plot which compares the fit to the data and displays the corresponding residuals

def error_band(x, func, beta):
742def error_band(x, func, beta):
743    """Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta."""
744    cov = covariance(beta)
745    if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps):
746        warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning)
747
748    deriv = []
749    for i, item in enumerate(x):
750        deriv.append(np.array(egrad(func)([o.value for o in beta], item)))
751
752    err = []
753    for i, item in enumerate(x):
754        err.append(np.sqrt(deriv[i] @ cov @ deriv[i]))
755    err = np.array(err)
756
757    return err

Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta.

def ks_test(objects=None):
760def ks_test(objects=None):
761    """Performs a Kolmogorov–Smirnov test for the p-values of all fit object.
762
763    Parameters
764    ----------
765    objects : list
766        List of fit results to include in the analysis (optional).
767    """
768
769    if objects is None:
770        obs_list = []
771        for obj in gc.get_objects():
772            if isinstance(obj, Fit_result):
773                obs_list.append(obj)
774    else:
775        obs_list = objects
776
777    p_values = [o.p_value for o in obs_list]
778
779    bins = len(p_values)
780    x = np.arange(0, 1.001, 0.001)
781    plt.plot(x, x, 'k', zorder=1)
782    plt.xlim(0, 1)
783    plt.ylim(0, 1)
784    plt.xlabel('p-value')
785    plt.ylabel('Cumulative probability')
786    plt.title(str(bins) + ' p-values')
787
788    n = np.arange(1, bins + 1) / np.float64(bins)
789    Xs = np.sort(p_values)
790    plt.step(Xs, n)
791    diffs = n - Xs
792    loc_max_diff = np.argmax(np.abs(diffs))
793    loc = Xs[loc_max_diff]
794    plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0))
795    plt.draw()
796
797    print(scipy.stats.kstest(p_values, 'uniform'))

Performs a Kolmogorov–Smirnov test for the p-values of all fit object.

Parameters
  • objects (list): List of fit results to include in the analysis (optional).