pyerrors.fits

  1import gc
  2from collections.abc import Sequence
  3import warnings
  4import numpy as np
  5import autograd.numpy as anp
  6import scipy.optimize
  7import scipy.stats
  8import matplotlib.pyplot as plt
  9from matplotlib import gridspec
 10from scipy.odr import ODR, Model, RealData
 11import iminuit
 12from autograd import jacobian as auto_jacobian
 13from autograd import hessian as auto_hessian
 14from autograd import elementwise_grad as egrad
 15from numdifftools import Jacobian as num_jacobian
 16from numdifftools import Hessian as num_hessian
 17from .obs import Obs, derived_observable, covariance, cov_Obs
 18
 19
 20class Fit_result(Sequence):
 21    """Represents fit results.
 22
 23    Attributes
 24    ----------
 25    fit_parameters : list
 26        results for the individual fit parameters,
 27        also accessible via indices.
 28    chisquare_by_dof : float
 29        reduced chisquare.
 30    p_value : float
 31        p-value of the fit
 32    t2_p_value : float
 33        Hotelling t-squared p-value for correlated fits.
 34    """
 35
 36    def __init__(self):
 37        self.fit_parameters = None
 38
 39    def __getitem__(self, idx):
 40        return self.fit_parameters[idx]
 41
 42    def __len__(self):
 43        return len(self.fit_parameters)
 44
 45    def gamma_method(self, **kwargs):
 46        """Apply the gamma method to all fit parameters"""
 47        [o.gamma_method(**kwargs) for o in self.fit_parameters]
 48
 49    gm = gamma_method
 50
 51    def __str__(self):
 52        my_str = 'Goodness of fit:\n'
 53        if hasattr(self, 'chisquare_by_dof'):
 54            my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n'
 55        elif hasattr(self, 'residual_variance'):
 56            my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n'
 57        if hasattr(self, 'chisquare_by_expected_chisquare'):
 58            my_str += '\u03C7\u00b2/\u03C7\u00b2exp  = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n'
 59        if hasattr(self, 'p_value'):
 60            my_str += 'p-value   = ' + f'{self.p_value:2.4f}' + '\n'
 61        if hasattr(self, 't2_p_value'):
 62            my_str += 't\u00B2p-value = ' + f'{self.t2_p_value:2.4f}' + '\n'
 63        my_str += 'Fit parameters:\n'
 64        for i_par, par in enumerate(self.fit_parameters):
 65            my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n'
 66        return my_str
 67
 68    def __repr__(self):
 69        m = max(map(len, list(self.__dict__.keys()))) + 1
 70        return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())])
 71
 72
 73def least_squares(x, y, func, priors=None, silent=False, **kwargs):
 74    r'''Performs a non-linear fit to y = func(x).
 75
 76    Parameters
 77    ----------
 78    x : list
 79        list of floats.
 80    y : list
 81        list of Obs.
 82    func : object
 83        fit function, has to be of the form
 84
 85        ```python
 86        import autograd.numpy as anp
 87
 88        def func(a, x):
 89            return a[0] + a[1] * x + a[2] * anp.sinh(x)
 90        ```
 91
 92        For multiple x values func can be of the form
 93
 94        ```python
 95        def func(a, x):
 96            (x1, x2) = x
 97            return a[0] * x1 ** 2 + a[1] * x2
 98        ```
 99
100        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
101        will not work.
102    priors : list, optional
103        priors has to be a list with an entry for every parameter in the fit. The entries can either be
104        Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like
105        0.548(23), 500(40) or 0.5(0.4)
106    silent : bool, optional
107        If true all output to the console is omitted (default False).
108    initial_guess : list
109        can provide an initial guess for the input parameters. Relevant for
110        non-linear fits with many parameters. In case of correlated fits the guess is used to perform
111        an uncorrelated fit which then serves as guess for the correlated fit.
112    method : str, optional
113        can be used to choose an alternative method for the minimization of chisquare.
114        The possible methods are the ones which can be used for scipy.optimize.minimize and
115        migrad of iminuit. If no method is specified, Levenberg-Marquard is used.
116        Reliable alternatives are migrad, Powell and Nelder-Mead.
117    correlated_fit : bool
118        If True, use the full inverse covariance matrix in the definition of the chisquare cost function.
119        For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
120        In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix).
121        This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning).
122        At the moment this option only works for `prior==None` and when no `method` is given.
123    expected_chisquare : bool
124        If True estimates the expected chisquare which is
125        corrected by effects caused by correlated input data (default False).
126    resplot : bool
127        If True, a plot which displays fit, data and residuals is generated (default False).
128    qqplot : bool
129        If True, a quantile-quantile plot of the fit result is generated (default False).
130    num_grad : bool
131        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
132    '''
133    if priors is not None:
134        return _prior_fit(x, y, func, priors, silent=silent, **kwargs)
135    else:
136        return _standard_fit(x, y, func, silent=silent, **kwargs)
137
138
139def total_least_squares(x, y, func, silent=False, **kwargs):
140    r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
141
142    Parameters
143    ----------
144    x : list
145        list of Obs, or a tuple of lists of Obs
146    y : list
147        list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
148    func : object
149        func has to be of the form
150
151        ```python
152        import autograd.numpy as anp
153
154        def func(a, x):
155            return a[0] + a[1] * x + a[2] * anp.sinh(x)
156        ```
157
158        For multiple x values func can be of the form
159
160        ```python
161        def func(a, x):
162            (x1, x2) = x
163            return a[0] * x1 ** 2 + a[1] * x2
164        ```
165
166        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
167        will not work.
168    silent : bool, optional
169        If true all output to the console is omitted (default False).
170    initial_guess : list
171        can provide an initial guess for the input parameters. Relevant for non-linear
172        fits with many parameters.
173    expected_chisquare : bool
174        If true prints the expected chisquare which is
175        corrected by effects caused by correlated input data.
176        This can take a while as the full correlation matrix
177        has to be calculated (default False).
178    num_grad : bool
179        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
180
181    Notes
182    -----
183    Based on the orthogonal distance regression module of scipy
184    '''
185
186    output = Fit_result()
187
188    output.fit_function = func
189
190    x = np.array(x)
191
192    x_shape = x.shape
193
194    if kwargs.get('num_grad') is True:
195        jacobian = num_jacobian
196        hessian = num_hessian
197    else:
198        jacobian = auto_jacobian
199        hessian = auto_hessian
200
201    if not callable(func):
202        raise TypeError('func has to be a function.')
203
204    for i in range(42):
205        try:
206            func(np.arange(i), x.T[0])
207        except TypeError:
208            continue
209        except IndexError:
210            continue
211        else:
212            break
213    else:
214        raise RuntimeError("Fit function is not valid.")
215
216    n_parms = i
217    if not silent:
218        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
219
220    x_f = np.vectorize(lambda o: o.value)(x)
221    dx_f = np.vectorize(lambda o: o.dvalue)(x)
222    y_f = np.array([o.value for o in y])
223    dy_f = np.array([o.dvalue for o in y])
224
225    if np.any(np.asarray(dx_f) <= 0.0):
226        raise Exception('No x errors available, run the gamma method first.')
227
228    if np.any(np.asarray(dy_f) <= 0.0):
229        raise Exception('No y errors available, run the gamma method first.')
230
231    if 'initial_guess' in kwargs:
232        x0 = kwargs.get('initial_guess')
233        if len(x0) != n_parms:
234            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
235    else:
236        x0 = [1] * n_parms
237
238    data = RealData(x_f, y_f, sx=dx_f, sy=dy_f)
239    model = Model(func)
240    odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps)
241    odr.set_job(fit_type=0, deriv=1)
242    out = odr.run()
243
244    output.residual_variance = out.res_var
245
246    output.method = 'ODR'
247
248    output.message = out.stopreason
249
250    output.xplus = out.xplus
251
252    if not silent:
253        print('Method: ODR')
254        print(*out.stopreason)
255        print('Residual variance:', output.residual_variance)
256
257    if out.info > 3:
258        raise Exception('The minimization procedure did not converge.')
259
260    m = x_f.size
261
262    def odr_chisquare(p):
263        model = func(p[:n_parms], p[n_parms:].reshape(x_shape))
264        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2)
265        return chisq
266
267    if kwargs.get('expected_chisquare') is True:
268        W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel()))))
269
270        if kwargs.get('covariance') is not None:
271            cov = kwargs.get('covariance')
272        else:
273            cov = covariance(np.concatenate((y, x.ravel())))
274
275        number_of_x_parameters = int(m / x_f.shape[-1])
276
277        old_jac = jacobian(func)(out.beta, out.xplus)
278        fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0)))
279        fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0])))
280        new_jac = np.concatenate((fused_row1, fused_row2), axis=1)
281
282        A = W @ new_jac
283        P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
284        expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W)
285        if expected_chisquare <= 0.0:
286            warnings.warn("Negative expected_chisquare.", RuntimeWarning)
287            expected_chisquare = np.abs(expected_chisquare)
288        output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare
289        if not silent:
290            print('chisquare/expected_chisquare:',
291                  output.chisquare_by_expected_chisquare)
292
293    fitp = out.beta
294    try:
295        hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel())))
296    except TypeError:
297        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
298
299    def odr_chisquare_compact_x(d):
300        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
301        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
302        return chisq
303
304    jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel())))
305
306    # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv
307    try:
308        deriv_x = -scipy.linalg.solve(hess, jac_jac_x[:n_parms + m, n_parms + m:])
309    except np.linalg.LinAlgError:
310        raise Exception("Cannot invert hessian matrix.")
311
312    def odr_chisquare_compact_y(d):
313        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
314        chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
315        return chisq
316
317    jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f)))
318
319    # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv
320    try:
321        deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms + m, n_parms + m:])
322    except np.linalg.LinAlgError:
323        raise Exception("Cannot invert hessian matrix.")
324
325    result = []
326    for i in range(n_parms):
327        result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i])))
328
329    output.fit_parameters = result
330
331    output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel())))
332    output.dof = x.shape[-1] - n_parms
333    output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof)
334
335    return output
336
337
338def _prior_fit(x, y, func, priors, silent=False, **kwargs):
339    output = Fit_result()
340
341    output.fit_function = func
342
343    x = np.asarray(x)
344
345    if kwargs.get('num_grad') is True:
346        hessian = num_hessian
347    else:
348        hessian = auto_hessian
349
350    if not callable(func):
351        raise TypeError('func has to be a function.')
352
353    for i in range(100):
354        try:
355            func(np.arange(i), 0)
356        except TypeError:
357            continue
358        except IndexError:
359            continue
360        else:
361            break
362    else:
363        raise RuntimeError("Fit function is not valid.")
364
365    n_parms = i
366
367    if n_parms != len(priors):
368        raise Exception('Priors does not have the correct length.')
369
370    def extract_val_and_dval(string):
371        split_string = string.split('(')
372        if '.' in split_string[0] and '.' not in split_string[1][:-1]:
373            factor = 10 ** -len(split_string[0].partition('.')[2])
374        else:
375            factor = 1
376        return float(split_string[0]), float(split_string[1][:-1]) * factor
377
378    loc_priors = []
379    for i_n, i_prior in enumerate(priors):
380        if isinstance(i_prior, Obs):
381            loc_priors.append(i_prior)
382        else:
383            loc_val, loc_dval = extract_val_and_dval(i_prior)
384            loc_priors.append(cov_Obs(loc_val, loc_dval ** 2, '#prior' + str(i_n) + f"_{np.random.randint(2147483647):010d}"))
385
386    output.priors = loc_priors
387
388    if not silent:
389        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
390
391    y_f = [o.value for o in y]
392    dy_f = [o.dvalue for o in y]
393
394    if np.any(np.asarray(dy_f) <= 0.0):
395        raise Exception('No y errors available, run the gamma method first.')
396
397    p_f = [o.value for o in loc_priors]
398    dp_f = [o.dvalue for o in loc_priors]
399
400    if np.any(np.asarray(dp_f) <= 0.0):
401        raise Exception('No prior errors available, run the gamma method first.')
402
403    if 'initial_guess' in kwargs:
404        x0 = kwargs.get('initial_guess')
405        if len(x0) != n_parms:
406            raise Exception('Initial guess does not have the correct length.')
407    else:
408        x0 = p_f
409
410    def chisqfunc(p):
411        model = func(p, x)
412        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((p_f - p) / dp_f) ** 2)
413        return chisq
414
415    if not silent:
416        print('Method: migrad')
417
418    m = iminuit.Minuit(chisqfunc, x0)
419    m.errordef = 1
420    m.print_level = 0
421    if 'tol' in kwargs:
422        m.tol = kwargs.get('tol')
423    else:
424        m.tol = 1e-4
425    m.migrad()
426    params = np.asarray(m.values)
427
428    output.chisquare_by_dof = m.fval / len(x)
429
430    output.method = 'migrad'
431
432    if not silent:
433        print('chisquare/d.o.f.:', output.chisquare_by_dof)
434
435    if not m.fmin.is_valid:
436        raise Exception('The minimization procedure did not converge.')
437
438    hess = hessian(chisqfunc)(params)
439    hess_inv = np.linalg.pinv(hess)
440
441    def chisqfunc_compact(d):
442        model = func(d[:n_parms], x)
443        chisq = anp.sum(((d[n_parms: n_parms + len(x)] - model) / dy_f) ** 2) + anp.sum(((d[n_parms + len(x):] - d[:n_parms]) / dp_f) ** 2)
444        return chisq
445
446    jac_jac = hessian(chisqfunc_compact)(np.concatenate((params, y_f, p_f)))
447
448    deriv = -hess_inv @ jac_jac[:n_parms, n_parms:]
449
450    result = []
451    for i in range(n_parms):
452        result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * params[i], list(y) + list(loc_priors), man_grad=list(deriv[i])))
453
454    output.fit_parameters = result
455    output.chisquare = chisqfunc(np.asarray(params))
456
457    if kwargs.get('resplot') is True:
458        residual_plot(x, y, func, result)
459
460    if kwargs.get('qqplot') is True:
461        qqplot(x, y, func, result)
462
463    return output
464
465
466def _standard_fit(x, y, func, silent=False, **kwargs):
467
468    output = Fit_result()
469
470    output.fit_function = func
471
472    x = np.asarray(x)
473
474    if kwargs.get('num_grad') is True:
475        jacobian = num_jacobian
476        hessian = num_hessian
477    else:
478        jacobian = auto_jacobian
479        hessian = auto_hessian
480
481    if x.shape[-1] != len(y):
482        raise Exception('x and y input have to have the same length')
483
484    if len(x.shape) > 2:
485        raise Exception('Unknown format for x values')
486
487    if not callable(func):
488        raise TypeError('func has to be a function.')
489
490    for i in range(42):
491        try:
492            func(np.arange(i), x.T[0])
493        except TypeError:
494            continue
495        except IndexError:
496            continue
497        else:
498            break
499    else:
500        raise RuntimeError("Fit function is not valid.")
501
502    n_parms = i
503
504    if not silent:
505        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
506
507    y_f = [o.value for o in y]
508    dy_f = [o.dvalue for o in y]
509
510    if np.any(np.asarray(dy_f) <= 0.0):
511        raise Exception('No y errors available, run the gamma method first.')
512
513    if 'initial_guess' in kwargs:
514        x0 = kwargs.get('initial_guess')
515        if len(x0) != n_parms:
516            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
517    else:
518        x0 = [0.1] * n_parms
519
520    if kwargs.get('correlated_fit') is True:
521        corr = covariance(y, correlation=True, **kwargs)
522        covdiag = np.diag(1 / np.asarray(dy_f))
523        condn = np.linalg.cond(corr)
524        if condn > 0.1 / np.finfo(float).eps:
525            raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
526        if condn > 1e13:
527            warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
528        chol = np.linalg.cholesky(corr)
529        chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
530
531        def chisqfunc_corr(p):
532            model = func(p, x)
533            chisq = anp.sum(anp.dot(chol_inv, (y_f - model)) ** 2)
534            return chisq
535
536    def chisqfunc(p):
537        model = func(p, x)
538        chisq = anp.sum(((y_f - model) / dy_f) ** 2)
539        return chisq
540
541    output.method = kwargs.get('method', 'Levenberg-Marquardt')
542    if not silent:
543        print('Method:', output.method)
544
545    if output.method != 'Levenberg-Marquardt':
546        if output.method == 'migrad':
547            fit_result = iminuit.minimize(chisqfunc, x0, tol=1e-4)  # Stopping criterion 0.002 * tol * errordef
548            if kwargs.get('correlated_fit') is True:
549                fit_result = iminuit.minimize(chisqfunc_corr, fit_result.x, tol=1e-4)  # Stopping criterion 0.002 * tol * errordef
550            output.iterations = fit_result.nfev
551        else:
552            fit_result = scipy.optimize.minimize(chisqfunc, x0, method=kwargs.get('method'), tol=1e-12)
553            if kwargs.get('correlated_fit') is True:
554                fit_result = scipy.optimize.minimize(chisqfunc_corr, fit_result.x, method=kwargs.get('method'), tol=1e-12)
555            output.iterations = fit_result.nit
556
557        chisquare = fit_result.fun
558
559    else:
560        if kwargs.get('correlated_fit') is True:
561            def chisqfunc_residuals_corr(p):
562                model = func(p, x)
563                chisq = anp.dot(chol_inv, (y_f - model))
564                return chisq
565
566        def chisqfunc_residuals(p):
567            model = func(p, x)
568            chisq = ((y_f - model) / dy_f)
569            return chisq
570
571        fit_result = scipy.optimize.least_squares(chisqfunc_residuals, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
572        if kwargs.get('correlated_fit') is True:
573            fit_result = scipy.optimize.least_squares(chisqfunc_residuals_corr, fit_result.x, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
574
575        chisquare = np.sum(fit_result.fun ** 2)
576        if kwargs.get('correlated_fit') is True:
577            assert np.isclose(chisquare, chisqfunc_corr(fit_result.x), atol=1e-14)
578        else:
579            assert np.isclose(chisquare, chisqfunc(fit_result.x), atol=1e-14)
580
581        output.iterations = fit_result.nfev
582
583    if not fit_result.success:
584        raise Exception('The minimization procedure did not converge.')
585
586    if x.shape[-1] - n_parms > 0:
587        output.chisquare_by_dof = chisquare / (x.shape[-1] - n_parms)
588    else:
589        output.chisquare_by_dof = float('nan')
590
591    output.message = fit_result.message
592    if not silent:
593        print(fit_result.message)
594        print('chisquare/d.o.f.:', output.chisquare_by_dof)
595
596    if kwargs.get('expected_chisquare') is True:
597        if kwargs.get('correlated_fit') is not True:
598            W = np.diag(1 / np.asarray(dy_f))
599            cov = covariance(y)
600            A = W @ jacobian(func)(fit_result.x, x)
601            P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
602            expected_chisquare = np.trace((np.identity(x.shape[-1]) - P_phi) @ W @ cov @ W)
603            output.chisquare_by_expected_chisquare = chisquare / expected_chisquare
604            if not silent:
605                print('chisquare/expected_chisquare:',
606                      output.chisquare_by_expected_chisquare)
607
608    fitp = fit_result.x
609    try:
610        if kwargs.get('correlated_fit') is True:
611            hess = hessian(chisqfunc_corr)(fitp)
612        else:
613            hess = hessian(chisqfunc)(fitp)
614    except TypeError:
615        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
616
617    if kwargs.get('correlated_fit') is True:
618        def chisqfunc_compact(d):
619            model = func(d[:n_parms], x)
620            chisq = anp.sum(anp.dot(chol_inv, (d[n_parms:] - model)) ** 2)
621            return chisq
622
623    else:
624        def chisqfunc_compact(d):
625            model = func(d[:n_parms], x)
626            chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2)
627            return chisq
628
629    jac_jac = hessian(chisqfunc_compact)(np.concatenate((fitp, y_f)))
630
631    # Compute hess^{-1} @ jac_jac[:n_parms, n_parms:] using LAPACK dgesv
632    try:
633        deriv = -scipy.linalg.solve(hess, jac_jac[:n_parms, n_parms:])
634    except np.linalg.LinAlgError:
635        raise Exception("Cannot invert hessian matrix.")
636
637    result = []
638    for i in range(n_parms):
639        result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * fit_result.x[i], list(y), man_grad=list(deriv[i])))
640
641    output.fit_parameters = result
642
643    output.chisquare = chisquare
644    output.dof = x.shape[-1] - n_parms
645    output.p_value = 1 - scipy.stats.chi2.cdf(output.chisquare, output.dof)
646    # Hotelling t-squared p-value for correlated fits.
647    if kwargs.get('correlated_fit') is True:
648        n_cov = np.min(np.vectorize(lambda x: x.N)(y))
649        output.t2_p_value = 1 - scipy.stats.f.cdf((n_cov - output.dof) / (output.dof * (n_cov - 1)) * output.chisquare,
650                                                  output.dof, n_cov - output.dof)
651
652    if kwargs.get('resplot') is True:
653        residual_plot(x, y, func, result)
654
655    if kwargs.get('qqplot') is True:
656        qqplot(x, y, func, result)
657
658    return output
659
660
661def fit_lin(x, y, **kwargs):
662    """Performs a linear fit to y = n + m * x and returns two Obs n, m.
663
664    Parameters
665    ----------
666    x : list
667        Can either be a list of floats in which case no xerror is assumed, or
668        a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
669    y : list
670        List of Obs, the dvalues of the Obs are used as yerror for the fit.
671    """
672
673    def f(a, x):
674        y = a[0] + a[1] * x
675        return y
676
677    if all(isinstance(n, Obs) for n in x):
678        out = total_least_squares(x, y, f, **kwargs)
679        return out.fit_parameters
680    elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray):
681        out = least_squares(x, y, f, **kwargs)
682        return out.fit_parameters
683    else:
684        raise Exception('Unsupported types for x')
685
686
687def qqplot(x, o_y, func, p):
688    """Generates a quantile-quantile plot of the fit result which can be used to
689       check if the residuals of the fit are gaussian distributed.
690    """
691
692    residuals = []
693    for i_x, i_y in zip(x, o_y):
694        residuals.append((i_y - func(p, i_x)) / i_y.dvalue)
695    residuals = sorted(residuals)
696    my_y = [o.value for o in residuals]
697    probplot = scipy.stats.probplot(my_y)
698    my_x = probplot[0][0]
699    plt.figure(figsize=(8, 8 / 1.618))
700    plt.errorbar(my_x, my_y, fmt='o')
701    fit_start = my_x[0]
702    fit_stop = my_x[-1]
703    samples = np.arange(fit_start, fit_stop, 0.01)
704    plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution')
705    plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-')
706
707    plt.xlabel('Theoretical quantiles')
708    plt.ylabel('Ordered Values')
709    plt.legend()
710    plt.draw()
711
712
713def residual_plot(x, y, func, fit_res):
714    """ Generates a plot which compares the fit to the data and displays the corresponding residuals"""
715    sorted_x = sorted(x)
716    xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0])
717    xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2])
718    x_samples = np.arange(xstart, xstop + 0.01, 0.01)
719
720    plt.figure(figsize=(8, 8 / 1.618))
721    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0)
722    ax0 = plt.subplot(gs[0])
723    ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data')
724    ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0)
725    ax0.set_xticklabels([])
726    ax0.set_xlim([xstart, xstop])
727    ax0.set_xticklabels([])
728    ax0.legend()
729
730    residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y])
731    ax1 = plt.subplot(gs[1])
732    ax1.plot(x, residuals, 'ko', ls='none', markersize=5)
733    ax1.tick_params(direction='out')
734    ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True)
735    ax1.axhline(y=0.0, ls='--', color='k', marker=" ")
736    ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k')
737    ax1.set_xlim([xstart, xstop])
738    ax1.set_ylabel('Residuals')
739    plt.subplots_adjust(wspace=None, hspace=None)
740    plt.draw()
741
742
743def error_band(x, func, beta):
744    """Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta."""
745    cov = covariance(beta)
746    if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps):
747        warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning)
748
749    deriv = []
750    for i, item in enumerate(x):
751        deriv.append(np.array(egrad(func)([o.value for o in beta], item)))
752
753    err = []
754    for i, item in enumerate(x):
755        err.append(np.sqrt(deriv[i] @ cov @ deriv[i]))
756    err = np.array(err)
757
758    return err
759
760
761def ks_test(objects=None):
762    """Performs a Kolmogorov–Smirnov test for the p-values of all fit object.
763
764    Parameters
765    ----------
766    objects : list
767        List of fit results to include in the analysis (optional).
768    """
769
770    if objects is None:
771        obs_list = []
772        for obj in gc.get_objects():
773            if isinstance(obj, Fit_result):
774                obs_list.append(obj)
775    else:
776        obs_list = objects
777
778    p_values = [o.p_value for o in obs_list]
779
780    bins = len(p_values)
781    x = np.arange(0, 1.001, 0.001)
782    plt.plot(x, x, 'k', zorder=1)
783    plt.xlim(0, 1)
784    plt.ylim(0, 1)
785    plt.xlabel('p-value')
786    plt.ylabel('Cumulative probability')
787    plt.title(str(bins) + ' p-values')
788
789    n = np.arange(1, bins + 1) / np.float64(bins)
790    Xs = np.sort(p_values)
791    plt.step(Xs, n)
792    diffs = n - Xs
793    loc_max_diff = np.argmax(np.abs(diffs))
794    loc = Xs[loc_max_diff]
795    plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0))
796    plt.draw()
797
798    print(scipy.stats.kstest(p_values, 'uniform'))
class Fit_result(collections.abc.Sequence):
21class Fit_result(Sequence):
22    """Represents fit results.
23
24    Attributes
25    ----------
26    fit_parameters : list
27        results for the individual fit parameters,
28        also accessible via indices.
29    chisquare_by_dof : float
30        reduced chisquare.
31    p_value : float
32        p-value of the fit
33    t2_p_value : float
34        Hotelling t-squared p-value for correlated fits.
35    """
36
37    def __init__(self):
38        self.fit_parameters = None
39
40    def __getitem__(self, idx):
41        return self.fit_parameters[idx]
42
43    def __len__(self):
44        return len(self.fit_parameters)
45
46    def gamma_method(self, **kwargs):
47        """Apply the gamma method to all fit parameters"""
48        [o.gamma_method(**kwargs) for o in self.fit_parameters]
49
50    gm = gamma_method
51
52    def __str__(self):
53        my_str = 'Goodness of fit:\n'
54        if hasattr(self, 'chisquare_by_dof'):
55            my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n'
56        elif hasattr(self, 'residual_variance'):
57            my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n'
58        if hasattr(self, 'chisquare_by_expected_chisquare'):
59            my_str += '\u03C7\u00b2/\u03C7\u00b2exp  = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n'
60        if hasattr(self, 'p_value'):
61            my_str += 'p-value   = ' + f'{self.p_value:2.4f}' + '\n'
62        if hasattr(self, 't2_p_value'):
63            my_str += 't\u00B2p-value = ' + f'{self.t2_p_value:2.4f}' + '\n'
64        my_str += 'Fit parameters:\n'
65        for i_par, par in enumerate(self.fit_parameters):
66            my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n'
67        return my_str
68
69    def __repr__(self):
70        m = max(map(len, list(self.__dict__.keys()))) + 1
71        return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())])

Represents fit results.

Attributes
  • fit_parameters (list): results for the individual fit parameters, also accessible via indices.
  • chisquare_by_dof (float): reduced chisquare.
  • p_value (float): p-value of the fit
  • t2_p_value (float): Hotelling t-squared p-value for correlated fits.
Fit_result()
37    def __init__(self):
38        self.fit_parameters = None
def gamma_method(self, **kwargs):
46    def gamma_method(self, **kwargs):
47        """Apply the gamma method to all fit parameters"""
48        [o.gamma_method(**kwargs) for o in self.fit_parameters]

Apply the gamma method to all fit parameters

def gm(self, **kwargs):
46    def gamma_method(self, **kwargs):
47        """Apply the gamma method to all fit parameters"""
48        [o.gamma_method(**kwargs) for o in self.fit_parameters]

Apply the gamma method to all fit parameters

Inherited Members
collections.abc.Sequence
index
count
def least_squares(x, y, func, priors=None, silent=False, **kwargs):
 74def least_squares(x, y, func, priors=None, silent=False, **kwargs):
 75    r'''Performs a non-linear fit to y = func(x).
 76
 77    Parameters
 78    ----------
 79    x : list
 80        list of floats.
 81    y : list
 82        list of Obs.
 83    func : object
 84        fit function, has to be of the form
 85
 86        ```python
 87        import autograd.numpy as anp
 88
 89        def func(a, x):
 90            return a[0] + a[1] * x + a[2] * anp.sinh(x)
 91        ```
 92
 93        For multiple x values func can be of the form
 94
 95        ```python
 96        def func(a, x):
 97            (x1, x2) = x
 98            return a[0] * x1 ** 2 + a[1] * x2
 99        ```
100
101        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
102        will not work.
103    priors : list, optional
104        priors has to be a list with an entry for every parameter in the fit. The entries can either be
105        Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like
106        0.548(23), 500(40) or 0.5(0.4)
107    silent : bool, optional
108        If true all output to the console is omitted (default False).
109    initial_guess : list
110        can provide an initial guess for the input parameters. Relevant for
111        non-linear fits with many parameters. In case of correlated fits the guess is used to perform
112        an uncorrelated fit which then serves as guess for the correlated fit.
113    method : str, optional
114        can be used to choose an alternative method for the minimization of chisquare.
115        The possible methods are the ones which can be used for scipy.optimize.minimize and
116        migrad of iminuit. If no method is specified, Levenberg-Marquard is used.
117        Reliable alternatives are migrad, Powell and Nelder-Mead.
118    correlated_fit : bool
119        If True, use the full inverse covariance matrix in the definition of the chisquare cost function.
120        For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
121        In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix).
122        This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning).
123        At the moment this option only works for `prior==None` and when no `method` is given.
124    expected_chisquare : bool
125        If True estimates the expected chisquare which is
126        corrected by effects caused by correlated input data (default False).
127    resplot : bool
128        If True, a plot which displays fit, data and residuals is generated (default False).
129    qqplot : bool
130        If True, a quantile-quantile plot of the fit result is generated (default False).
131    num_grad : bool
132        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
133    '''
134    if priors is not None:
135        return _prior_fit(x, y, func, priors, silent=silent, **kwargs)
136    else:
137        return _standard_fit(x, y, func, silent=silent, **kwargs)

Performs a non-linear fit to y = func(x).

Parameters
  • x (list): list of floats.
  • y (list): list of Obs.
  • func (object): fit function, has to be of the form

    import autograd.numpy as anp
    
    def func(a, x):
       return a[0] + a[1] * x + a[2] * anp.sinh(x)
    

    For multiple x values func can be of the form

    def func(a, x):
       (x1, x2) = x
       return a[0] * x1 ** 2 + a[1] * x2
    

    It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.

  • priors (list, optional): priors has to be a list with an entry for every parameter in the fit. The entries can either be Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 0.548(23), 500(40) or 0.5(0.4)
  • silent (bool, optional): If true all output to the console is omitted (default False).
  • initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters. In case of correlated fits the guess is used to perform an uncorrelated fit which then serves as guess for the correlated fit.
  • method (str, optional): can be used to choose an alternative method for the minimization of chisquare. The possible methods are the ones which can be used for scipy.optimize.minimize and migrad of iminuit. If no method is specified, Levenberg-Marquard is used. Reliable alternatives are migrad, Powell and Nelder-Mead.
  • correlated_fit (bool): If True, use the full inverse covariance matrix in the definition of the chisquare cost function. For details about how the covariance matrix is estimated see pyerrors.obs.covariance. In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix). This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning). At the moment this option only works for prior==None and when no method is given.
  • expected_chisquare (bool): If True estimates the expected chisquare which is corrected by effects caused by correlated input data (default False).
  • resplot (bool): If True, a plot which displays fit, data and residuals is generated (default False).
  • qqplot (bool): If True, a quantile-quantile plot of the fit result is generated (default False).
  • num_grad (bool): Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
def total_least_squares(x, y, func, silent=False, **kwargs):
140def total_least_squares(x, y, func, silent=False, **kwargs):
141    r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
142
143    Parameters
144    ----------
145    x : list
146        list of Obs, or a tuple of lists of Obs
147    y : list
148        list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
149    func : object
150        func has to be of the form
151
152        ```python
153        import autograd.numpy as anp
154
155        def func(a, x):
156            return a[0] + a[1] * x + a[2] * anp.sinh(x)
157        ```
158
159        For multiple x values func can be of the form
160
161        ```python
162        def func(a, x):
163            (x1, x2) = x
164            return a[0] * x1 ** 2 + a[1] * x2
165        ```
166
167        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
168        will not work.
169    silent : bool, optional
170        If true all output to the console is omitted (default False).
171    initial_guess : list
172        can provide an initial guess for the input parameters. Relevant for non-linear
173        fits with many parameters.
174    expected_chisquare : bool
175        If true prints the expected chisquare which is
176        corrected by effects caused by correlated input data.
177        This can take a while as the full correlation matrix
178        has to be calculated (default False).
179    num_grad : bool
180        Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
181
182    Notes
183    -----
184    Based on the orthogonal distance regression module of scipy
185    '''
186
187    output = Fit_result()
188
189    output.fit_function = func
190
191    x = np.array(x)
192
193    x_shape = x.shape
194
195    if kwargs.get('num_grad') is True:
196        jacobian = num_jacobian
197        hessian = num_hessian
198    else:
199        jacobian = auto_jacobian
200        hessian = auto_hessian
201
202    if not callable(func):
203        raise TypeError('func has to be a function.')
204
205    for i in range(42):
206        try:
207            func(np.arange(i), x.T[0])
208        except TypeError:
209            continue
210        except IndexError:
211            continue
212        else:
213            break
214    else:
215        raise RuntimeError("Fit function is not valid.")
216
217    n_parms = i
218    if not silent:
219        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
220
221    x_f = np.vectorize(lambda o: o.value)(x)
222    dx_f = np.vectorize(lambda o: o.dvalue)(x)
223    y_f = np.array([o.value for o in y])
224    dy_f = np.array([o.dvalue for o in y])
225
226    if np.any(np.asarray(dx_f) <= 0.0):
227        raise Exception('No x errors available, run the gamma method first.')
228
229    if np.any(np.asarray(dy_f) <= 0.0):
230        raise Exception('No y errors available, run the gamma method first.')
231
232    if 'initial_guess' in kwargs:
233        x0 = kwargs.get('initial_guess')
234        if len(x0) != n_parms:
235            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
236    else:
237        x0 = [1] * n_parms
238
239    data = RealData(x_f, y_f, sx=dx_f, sy=dy_f)
240    model = Model(func)
241    odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps)
242    odr.set_job(fit_type=0, deriv=1)
243    out = odr.run()
244
245    output.residual_variance = out.res_var
246
247    output.method = 'ODR'
248
249    output.message = out.stopreason
250
251    output.xplus = out.xplus
252
253    if not silent:
254        print('Method: ODR')
255        print(*out.stopreason)
256        print('Residual variance:', output.residual_variance)
257
258    if out.info > 3:
259        raise Exception('The minimization procedure did not converge.')
260
261    m = x_f.size
262
263    def odr_chisquare(p):
264        model = func(p[:n_parms], p[n_parms:].reshape(x_shape))
265        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2)
266        return chisq
267
268    if kwargs.get('expected_chisquare') is True:
269        W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel()))))
270
271        if kwargs.get('covariance') is not None:
272            cov = kwargs.get('covariance')
273        else:
274            cov = covariance(np.concatenate((y, x.ravel())))
275
276        number_of_x_parameters = int(m / x_f.shape[-1])
277
278        old_jac = jacobian(func)(out.beta, out.xplus)
279        fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0)))
280        fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0])))
281        new_jac = np.concatenate((fused_row1, fused_row2), axis=1)
282
283        A = W @ new_jac
284        P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
285        expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W)
286        if expected_chisquare <= 0.0:
287            warnings.warn("Negative expected_chisquare.", RuntimeWarning)
288            expected_chisquare = np.abs(expected_chisquare)
289        output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare
290        if not silent:
291            print('chisquare/expected_chisquare:',
292                  output.chisquare_by_expected_chisquare)
293
294    fitp = out.beta
295    try:
296        hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel())))
297    except TypeError:
298        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
299
300    def odr_chisquare_compact_x(d):
301        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
302        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
303        return chisq
304
305    jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel())))
306
307    # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv
308    try:
309        deriv_x = -scipy.linalg.solve(hess, jac_jac_x[:n_parms + m, n_parms + m:])
310    except np.linalg.LinAlgError:
311        raise Exception("Cannot invert hessian matrix.")
312
313    def odr_chisquare_compact_y(d):
314        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
315        chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
316        return chisq
317
318    jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f)))
319
320    # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv
321    try:
322        deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms + m, n_parms + m:])
323    except np.linalg.LinAlgError:
324        raise Exception("Cannot invert hessian matrix.")
325
326    result = []
327    for i in range(n_parms):
328        result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i])))
329
330    output.fit_parameters = result
331
332    output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel())))
333    output.dof = x.shape[-1] - n_parms
334    output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof)
335
336    return output

Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.

Parameters
  • x (list): list of Obs, or a tuple of lists of Obs
  • y (list): list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
  • func (object): func has to be of the form

    import autograd.numpy as anp
    
    def func(a, x):
       return a[0] + a[1] * x + a[2] * anp.sinh(x)
    

    For multiple x values func can be of the form

    def func(a, x):
       (x1, x2) = x
       return a[0] * x1 ** 2 + a[1] * x2
    

    It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.

  • silent (bool, optional): If true all output to the console is omitted (default False).
  • initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters.
  • expected_chisquare (bool): If true prints the expected chisquare which is corrected by effects caused by correlated input data. This can take a while as the full correlation matrix has to be calculated (default False).
  • num_grad (bool): Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
Notes

Based on the orthogonal distance regression module of scipy

def fit_lin(x, y, **kwargs):
662def fit_lin(x, y, **kwargs):
663    """Performs a linear fit to y = n + m * x and returns two Obs n, m.
664
665    Parameters
666    ----------
667    x : list
668        Can either be a list of floats in which case no xerror is assumed, or
669        a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
670    y : list
671        List of Obs, the dvalues of the Obs are used as yerror for the fit.
672    """
673
674    def f(a, x):
675        y = a[0] + a[1] * x
676        return y
677
678    if all(isinstance(n, Obs) for n in x):
679        out = total_least_squares(x, y, f, **kwargs)
680        return out.fit_parameters
681    elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray):
682        out = least_squares(x, y, f, **kwargs)
683        return out.fit_parameters
684    else:
685        raise Exception('Unsupported types for x')

Performs a linear fit to y = n + m * x and returns two Obs n, m.

Parameters
  • x (list): Can either be a list of floats in which case no xerror is assumed, or a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
  • y (list): List of Obs, the dvalues of the Obs are used as yerror for the fit.
def qqplot(x, o_y, func, p):
688def qqplot(x, o_y, func, p):
689    """Generates a quantile-quantile plot of the fit result which can be used to
690       check if the residuals of the fit are gaussian distributed.
691    """
692
693    residuals = []
694    for i_x, i_y in zip(x, o_y):
695        residuals.append((i_y - func(p, i_x)) / i_y.dvalue)
696    residuals = sorted(residuals)
697    my_y = [o.value for o in residuals]
698    probplot = scipy.stats.probplot(my_y)
699    my_x = probplot[0][0]
700    plt.figure(figsize=(8, 8 / 1.618))
701    plt.errorbar(my_x, my_y, fmt='o')
702    fit_start = my_x[0]
703    fit_stop = my_x[-1]
704    samples = np.arange(fit_start, fit_stop, 0.01)
705    plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution')
706    plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-')
707
708    plt.xlabel('Theoretical quantiles')
709    plt.ylabel('Ordered Values')
710    plt.legend()
711    plt.draw()

Generates a quantile-quantile plot of the fit result which can be used to check if the residuals of the fit are gaussian distributed.

def residual_plot(x, y, func, fit_res):
714def residual_plot(x, y, func, fit_res):
715    """ Generates a plot which compares the fit to the data and displays the corresponding residuals"""
716    sorted_x = sorted(x)
717    xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0])
718    xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2])
719    x_samples = np.arange(xstart, xstop + 0.01, 0.01)
720
721    plt.figure(figsize=(8, 8 / 1.618))
722    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0)
723    ax0 = plt.subplot(gs[0])
724    ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data')
725    ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0)
726    ax0.set_xticklabels([])
727    ax0.set_xlim([xstart, xstop])
728    ax0.set_xticklabels([])
729    ax0.legend()
730
731    residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y])
732    ax1 = plt.subplot(gs[1])
733    ax1.plot(x, residuals, 'ko', ls='none', markersize=5)
734    ax1.tick_params(direction='out')
735    ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True)
736    ax1.axhline(y=0.0, ls='--', color='k', marker=" ")
737    ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k')
738    ax1.set_xlim([xstart, xstop])
739    ax1.set_ylabel('Residuals')
740    plt.subplots_adjust(wspace=None, hspace=None)
741    plt.draw()

Generates a plot which compares the fit to the data and displays the corresponding residuals

def error_band(x, func, beta):
744def error_band(x, func, beta):
745    """Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta."""
746    cov = covariance(beta)
747    if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps):
748        warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning)
749
750    deriv = []
751    for i, item in enumerate(x):
752        deriv.append(np.array(egrad(func)([o.value for o in beta], item)))
753
754    err = []
755    for i, item in enumerate(x):
756        err.append(np.sqrt(deriv[i] @ cov @ deriv[i]))
757    err = np.array(err)
758
759    return err

Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta.

def ks_test(objects=None):
762def ks_test(objects=None):
763    """Performs a Kolmogorov–Smirnov test for the p-values of all fit object.
764
765    Parameters
766    ----------
767    objects : list
768        List of fit results to include in the analysis (optional).
769    """
770
771    if objects is None:
772        obs_list = []
773        for obj in gc.get_objects():
774            if isinstance(obj, Fit_result):
775                obs_list.append(obj)
776    else:
777        obs_list = objects
778
779    p_values = [o.p_value for o in obs_list]
780
781    bins = len(p_values)
782    x = np.arange(0, 1.001, 0.001)
783    plt.plot(x, x, 'k', zorder=1)
784    plt.xlim(0, 1)
785    plt.ylim(0, 1)
786    plt.xlabel('p-value')
787    plt.ylabel('Cumulative probability')
788    plt.title(str(bins) + ' p-values')
789
790    n = np.arange(1, bins + 1) / np.float64(bins)
791    Xs = np.sort(p_values)
792    plt.step(Xs, n)
793    diffs = n - Xs
794    loc_max_diff = np.argmax(np.abs(diffs))
795    loc = Xs[loc_max_diff]
796    plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0))
797    plt.draw()
798
799    print(scipy.stats.kstest(p_values, 'uniform'))

Performs a Kolmogorov–Smirnov test for the p-values of all fit object.

Parameters
  • objects (list): List of fit results to include in the analysis (optional).