pyerrors.fits

View Source
  0import gc
  1from collections.abc import Sequence
  2import warnings
  3import numpy as np
  4import autograd.numpy as anp
  5import scipy.optimize
  6import scipy.stats
  7import matplotlib.pyplot as plt
  8from matplotlib import gridspec
  9from scipy.odr import ODR, Model, RealData
 10from scipy.stats import chi2
 11import iminuit
 12from autograd import jacobian
 13from autograd import elementwise_grad as egrad
 14from .obs import Obs, derived_observable, covariance, cov_Obs
 15
 16
 17class Fit_result(Sequence):
 18    """Represents fit results.
 19
 20    Attributes
 21    ----------
 22    fit_parameters : list
 23        results for the individual fit parameters,
 24        also accessible via indices.
 25    """
 26
 27    def __init__(self):
 28        self.fit_parameters = None
 29
 30    def __getitem__(self, idx):
 31        return self.fit_parameters[idx]
 32
 33    def __len__(self):
 34        return len(self.fit_parameters)
 35
 36    def gamma_method(self):
 37        """Apply the gamma method to all fit parameters"""
 38        [o.gamma_method() for o in self.fit_parameters]
 39
 40    def __str__(self):
 41        my_str = 'Goodness of fit:\n'
 42        if hasattr(self, 'chisquare_by_dof'):
 43            my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n'
 44        elif hasattr(self, 'residual_variance'):
 45            my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n'
 46        if hasattr(self, 'chisquare_by_expected_chisquare'):
 47            my_str += '\u03C7\u00b2/\u03C7\u00b2exp  = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n'
 48        if hasattr(self, 'p_value'):
 49            my_str += 'p-value   = ' + f'{self.p_value:2.4f}' + '\n'
 50        my_str += 'Fit parameters:\n'
 51        for i_par, par in enumerate(self.fit_parameters):
 52            my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n'
 53        return my_str
 54
 55    def __repr__(self):
 56        m = max(map(len, list(self.__dict__.keys()))) + 1
 57        return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())])
 58
 59
 60def least_squares(x, y, func, priors=None, silent=False, **kwargs):
 61    r'''Performs a non-linear fit to y = func(x).
 62
 63    Parameters
 64    ----------
 65    x : list
 66        list of floats.
 67    y : list
 68        list of Obs.
 69    func : object
 70        fit function, has to be of the form
 71
 72        ```python
 73        import autograd.numpy as anp
 74
 75        def func(a, x):
 76            return a[0] + a[1] * x + a[2] * anp.sinh(x)
 77        ```
 78
 79        For multiple x values func can be of the form
 80
 81        ```python
 82        def func(a, x):
 83            (x1, x2) = x
 84            return a[0] * x1 ** 2 + a[1] * x2
 85        ```
 86
 87        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
 88        will not work.
 89    priors : list, optional
 90        priors has to be a list with an entry for every parameter in the fit. The entries can either be
 91        Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like
 92        0.548(23), 500(40) or 0.5(0.4)
 93    silent : bool, optional
 94        If true all output to the console is omitted (default False).
 95    initial_guess : list
 96        can provide an initial guess for the input parameters. Relevant for
 97        non-linear fits with many parameters.
 98    method : str, optional
 99        can be used to choose an alternative method for the minimization of chisquare.
100        The possible methods are the ones which can be used for scipy.optimize.minimize and
101        migrad of iminuit. If no method is specified, Levenberg-Marquard is used.
102        Reliable alternatives are migrad, Powell and Nelder-Mead.
103    correlated_fit : bool
104        If True, use the full inverse covariance matrix in the definition of the chisquare cost function.
105        For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
106        In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix).
107        This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning).
108        At the moment this option only works for `prior==None` and when no `method` is given.
109    expected_chisquare : bool
110        If True estimates the expected chisquare which is
111        corrected by effects caused by correlated input data (default False).
112    resplot : bool
113        If True, a plot which displays fit, data and residuals is generated (default False).
114    qqplot : bool
115        If True, a quantile-quantile plot of the fit result is generated (default False).
116    '''
117    if priors is not None:
118        return _prior_fit(x, y, func, priors, silent=silent, **kwargs)
119    else:
120        return _standard_fit(x, y, func, silent=silent, **kwargs)
121
122
123def total_least_squares(x, y, func, silent=False, **kwargs):
124    r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
125
126    Parameters
127    ----------
128    x : list
129        list of Obs, or a tuple of lists of Obs
130    y : list
131        list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
132    func : object
133        func has to be of the form
134
135        ```python
136        import autograd.numpy as anp
137
138        def func(a, x):
139            return a[0] + a[1] * x + a[2] * anp.sinh(x)
140        ```
141
142        For multiple x values func can be of the form
143
144        ```python
145        def func(a, x):
146            (x1, x2) = x
147            return a[0] * x1 ** 2 + a[1] * x2
148        ```
149
150        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
151        will not work.
152    silent : bool, optional
153        If true all output to the console is omitted (default False).
154    initial_guess : list
155        can provide an initial guess for the input parameters. Relevant for non-linear
156        fits with many parameters.
157    expected_chisquare : bool
158        If true prints the expected chisquare which is
159        corrected by effects caused by correlated input data.
160        This can take a while as the full correlation matrix
161        has to be calculated (default False).
162
163    Notes
164    -----
165    Based on the orthogonal distance regression module of scipy
166    '''
167
168    output = Fit_result()
169
170    output.fit_function = func
171
172    x = np.array(x)
173
174    x_shape = x.shape
175
176    if not callable(func):
177        raise TypeError('func has to be a function.')
178
179    for i in range(25):
180        try:
181            func(np.arange(i), x.T[0])
182        except Exception:
183            pass
184        else:
185            break
186
187    n_parms = i
188    if not silent:
189        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
190
191    x_f = np.vectorize(lambda o: o.value)(x)
192    dx_f = np.vectorize(lambda o: o.dvalue)(x)
193    y_f = np.array([o.value for o in y])
194    dy_f = np.array([o.dvalue for o in y])
195
196    if np.any(np.asarray(dx_f) <= 0.0):
197        raise Exception('No x errors available, run the gamma method first.')
198
199    if np.any(np.asarray(dy_f) <= 0.0):
200        raise Exception('No y errors available, run the gamma method first.')
201
202    if 'initial_guess' in kwargs:
203        x0 = kwargs.get('initial_guess')
204        if len(x0) != n_parms:
205            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
206    else:
207        x0 = [1] * n_parms
208
209    data = RealData(x_f, y_f, sx=dx_f, sy=dy_f)
210    model = Model(func)
211    odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps)
212    odr.set_job(fit_type=0, deriv=1)
213    out = odr.run()
214
215    output.residual_variance = out.res_var
216
217    output.method = 'ODR'
218
219    output.message = out.stopreason
220
221    output.xplus = out.xplus
222
223    if not silent:
224        print('Method: ODR')
225        print(*out.stopreason)
226        print('Residual variance:', output.residual_variance)
227
228    if out.info > 3:
229        raise Exception('The minimization procedure did not converge.')
230
231    m = x_f.size
232
233    def odr_chisquare(p):
234        model = func(p[:n_parms], p[n_parms:].reshape(x_shape))
235        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2)
236        return chisq
237
238    if kwargs.get('expected_chisquare') is True:
239        W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel()))))
240
241        if kwargs.get('covariance') is not None:
242            cov = kwargs.get('covariance')
243        else:
244            cov = covariance(np.concatenate((y, x.ravel())))
245
246        number_of_x_parameters = int(m / x_f.shape[-1])
247
248        old_jac = jacobian(func)(out.beta, out.xplus)
249        fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0)))
250        fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0])))
251        new_jac = np.concatenate((fused_row1, fused_row2), axis=1)
252
253        A = W @ new_jac
254        P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
255        expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W)
256        if expected_chisquare <= 0.0:
257            warnings.warn("Negative expected_chisquare.", RuntimeWarning)
258            expected_chisquare = np.abs(expected_chisquare)
259        output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare
260        if not silent:
261            print('chisquare/expected_chisquare:',
262                  output.chisquare_by_expected_chisquare)
263
264    fitp = out.beta
265    try:
266        hess_inv = np.linalg.pinv(jacobian(jacobian(odr_chisquare))(np.concatenate((fitp, out.xplus.ravel()))))
267    except TypeError:
268        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
269
270    def odr_chisquare_compact_x(d):
271        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
272        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
273        return chisq
274
275    jac_jac_x = jacobian(jacobian(odr_chisquare_compact_x))(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel())))
276
277    deriv_x = -hess_inv @ jac_jac_x[:n_parms + m, n_parms + m:]
278
279    def odr_chisquare_compact_y(d):
280        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
281        chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
282        return chisq
283
284    jac_jac_y = jacobian(jacobian(odr_chisquare_compact_y))(np.concatenate((fitp, out.xplus.ravel(), y_f)))
285
286    deriv_y = -hess_inv @ jac_jac_y[:n_parms + m, n_parms + m:]
287
288    result = []
289    for i in range(n_parms):
290        result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i])))
291
292    output.fit_parameters = result
293
294    output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel())))
295    output.dof = x.shape[-1] - n_parms
296    output.p_value = 1 - chi2.cdf(output.odr_chisquare, output.dof)
297
298    return output
299
300
301def _prior_fit(x, y, func, priors, silent=False, **kwargs):
302    output = Fit_result()
303
304    output.fit_function = func
305
306    x = np.asarray(x)
307
308    if not callable(func):
309        raise TypeError('func has to be a function.')
310
311    for i in range(100):
312        try:
313            func(np.arange(i), 0)
314        except Exception:
315            pass
316        else:
317            break
318
319    n_parms = i
320
321    if n_parms != len(priors):
322        raise Exception('Priors does not have the correct length.')
323
324    def extract_val_and_dval(string):
325        split_string = string.split('(')
326        if '.' in split_string[0] and '.' not in split_string[1][:-1]:
327            factor = 10 ** -len(split_string[0].partition('.')[2])
328        else:
329            factor = 1
330        return float(split_string[0]), float(split_string[1][:-1]) * factor
331
332    loc_priors = []
333    for i_n, i_prior in enumerate(priors):
334        if isinstance(i_prior, Obs):
335            loc_priors.append(i_prior)
336        else:
337            loc_val, loc_dval = extract_val_and_dval(i_prior)
338            loc_priors.append(cov_Obs(loc_val, loc_dval ** 2, '#prior' + str(i_n) + f"_{np.random.randint(2147483647):010d}"))
339
340    output.priors = loc_priors
341
342    if not silent:
343        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
344
345    y_f = [o.value for o in y]
346    dy_f = [o.dvalue for o in y]
347
348    if np.any(np.asarray(dy_f) <= 0.0):
349        raise Exception('No y errors available, run the gamma method first.')
350
351    p_f = [o.value for o in loc_priors]
352    dp_f = [o.dvalue for o in loc_priors]
353
354    if np.any(np.asarray(dp_f) <= 0.0):
355        raise Exception('No prior errors available, run the gamma method first.')
356
357    if 'initial_guess' in kwargs:
358        x0 = kwargs.get('initial_guess')
359        if len(x0) != n_parms:
360            raise Exception('Initial guess does not have the correct length.')
361    else:
362        x0 = p_f
363
364    def chisqfunc(p):
365        model = func(p, x)
366        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((p_f - p) / dp_f) ** 2)
367        return chisq
368
369    if not silent:
370        print('Method: migrad')
371
372    m = iminuit.Minuit(chisqfunc, x0)
373    m.errordef = 1
374    m.print_level = 0
375    if 'tol' in kwargs:
376        m.tol = kwargs.get('tol')
377    else:
378        m.tol = 1e-4
379    m.migrad()
380    params = np.asarray(m.values)
381
382    output.chisquare_by_dof = m.fval / len(x)
383
384    output.method = 'migrad'
385
386    if not silent:
387        print('chisquare/d.o.f.:', output.chisquare_by_dof)
388
389    if not m.fmin.is_valid:
390        raise Exception('The minimization procedure did not converge.')
391
392    hess_inv = np.linalg.pinv(jacobian(jacobian(chisqfunc))(params))
393
394    def chisqfunc_compact(d):
395        model = func(d[:n_parms], x)
396        chisq = anp.sum(((d[n_parms: n_parms + len(x)] - model) / dy_f) ** 2) + anp.sum(((d[n_parms + len(x):] - d[:n_parms]) / dp_f) ** 2)
397        return chisq
398
399    jac_jac = jacobian(jacobian(chisqfunc_compact))(np.concatenate((params, y_f, p_f)))
400
401    deriv = -hess_inv @ jac_jac[:n_parms, n_parms:]
402
403    result = []
404    for i in range(n_parms):
405        result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * params[i], list(y) + list(loc_priors), man_grad=list(deriv[i])))
406
407    output.fit_parameters = result
408    output.chisquare = chisqfunc(np.asarray(params))
409
410    if kwargs.get('resplot') is True:
411        residual_plot(x, y, func, result)
412
413    if kwargs.get('qqplot') is True:
414        qqplot(x, y, func, result)
415
416    return output
417
418
419def _standard_fit(x, y, func, silent=False, **kwargs):
420
421    output = Fit_result()
422
423    output.fit_function = func
424
425    x = np.asarray(x)
426
427    if x.shape[-1] != len(y):
428        raise Exception('x and y input have to have the same length')
429
430    if len(x.shape) > 2:
431        raise Exception('Unknown format for x values')
432
433    if not callable(func):
434        raise TypeError('func has to be a function.')
435
436    for i in range(25):
437        try:
438            func(np.arange(i), x.T[0])
439        except Exception:
440            pass
441        else:
442            break
443
444    n_parms = i
445
446    if not silent:
447        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
448
449    y_f = [o.value for o in y]
450    dy_f = [o.dvalue for o in y]
451
452    if np.any(np.asarray(dy_f) <= 0.0):
453        raise Exception('No y errors available, run the gamma method first.')
454
455    if 'initial_guess' in kwargs:
456        x0 = kwargs.get('initial_guess')
457        if len(x0) != n_parms:
458            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
459    else:
460        x0 = [0.1] * n_parms
461
462    if kwargs.get('correlated_fit') is True:
463        corr = covariance(y, correlation=True, **kwargs)
464        covdiag = np.diag(1 / np.asarray(dy_f))
465        condn = np.linalg.cond(corr)
466        if condn > 0.1 / np.finfo(float).eps:
467            raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
468        if condn > 1 / np.sqrt(np.finfo(float).eps):
469            warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
470        chol = np.linalg.cholesky(corr)
471        chol_inv = np.linalg.inv(chol)
472        chol_inv = np.dot(chol_inv, covdiag)
473
474        def chisqfunc(p):
475            model = func(p, x)
476            chisq = anp.sum(anp.dot(chol_inv, (y_f - model)) ** 2)
477            return chisq
478    else:
479        def chisqfunc(p):
480            model = func(p, x)
481            chisq = anp.sum(((y_f - model) / dy_f) ** 2)
482            return chisq
483
484    output.method = kwargs.get('method', 'Levenberg-Marquardt')
485    if not silent:
486        print('Method:', output.method)
487
488    if output.method != 'Levenberg-Marquardt':
489        if output.method == 'migrad':
490            fit_result = iminuit.minimize(chisqfunc, x0, tol=1e-4)  # Stopping criterion 0.002 * tol * errordef
491            output.iterations = fit_result.nfev
492        else:
493            fit_result = scipy.optimize.minimize(chisqfunc, x0, method=kwargs.get('method'), tol=1e-12)
494            output.iterations = fit_result.nit
495
496        chisquare = fit_result.fun
497
498    else:
499        if kwargs.get('correlated_fit') is True:
500            def chisqfunc_residuals(p):
501                model = func(p, x)
502                chisq = anp.dot(chol_inv, (y_f - model))
503                return chisq
504
505        else:
506            def chisqfunc_residuals(p):
507                model = func(p, x)
508                chisq = ((y_f - model) / dy_f)
509                return chisq
510
511        fit_result = scipy.optimize.least_squares(chisqfunc_residuals, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
512
513        chisquare = np.sum(fit_result.fun ** 2)
514
515        output.iterations = fit_result.nfev
516
517    if not fit_result.success:
518        raise Exception('The minimization procedure did not converge.')
519
520    if x.shape[-1] - n_parms > 0:
521        output.chisquare_by_dof = chisquare / (x.shape[-1] - n_parms)
522    else:
523        output.chisquare_by_dof = float('nan')
524
525    output.message = fit_result.message
526    if not silent:
527        print(fit_result.message)
528        print('chisquare/d.o.f.:', output.chisquare_by_dof)
529
530    if kwargs.get('expected_chisquare') is True:
531        if kwargs.get('correlated_fit') is not True:
532            W = np.diag(1 / np.asarray(dy_f))
533            cov = covariance(y)
534            A = W @ jacobian(func)(fit_result.x, x)
535            P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
536            expected_chisquare = np.trace((np.identity(x.shape[-1]) - P_phi) @ W @ cov @ W)
537            output.chisquare_by_expected_chisquare = chisquare / expected_chisquare
538            if not silent:
539                print('chisquare/expected_chisquare:',
540                      output.chisquare_by_expected_chisquare)
541
542    fitp = fit_result.x
543    try:
544        hess_inv = np.linalg.pinv(jacobian(jacobian(chisqfunc))(fitp))
545    except TypeError:
546        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
547
548    if kwargs.get('correlated_fit') is True:
549        def chisqfunc_compact(d):
550            model = func(d[:n_parms], x)
551            chisq = anp.sum(anp.dot(chol_inv, (d[n_parms:] - model)) ** 2)
552            return chisq
553
554    else:
555        def chisqfunc_compact(d):
556            model = func(d[:n_parms], x)
557            chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2)
558            return chisq
559
560    jac_jac = jacobian(jacobian(chisqfunc_compact))(np.concatenate((fitp, y_f)))
561
562    deriv = -hess_inv @ jac_jac[:n_parms, n_parms:]
563
564    result = []
565    for i in range(n_parms):
566        result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * fit_result.x[i], list(y), man_grad=list(deriv[i])))
567
568    output.fit_parameters = result
569
570    output.chisquare = chisqfunc(fit_result.x)
571    output.dof = x.shape[-1] - n_parms
572    output.p_value = 1 - chi2.cdf(output.chisquare, output.dof)
573
574    if kwargs.get('resplot') is True:
575        residual_plot(x, y, func, result)
576
577    if kwargs.get('qqplot') is True:
578        qqplot(x, y, func, result)
579
580    return output
581
582
583def fit_lin(x, y, **kwargs):
584    """Performs a linear fit to y = n + m * x and returns two Obs n, m.
585
586    Parameters
587    ----------
588    x : list
589        Can either be a list of floats in which case no xerror is assumed, or
590        a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
591    y : list
592        List of Obs, the dvalues of the Obs are used as yerror for the fit.
593    """
594
595    def f(a, x):
596        y = a[0] + a[1] * x
597        return y
598
599    if all(isinstance(n, Obs) for n in x):
600        out = total_least_squares(x, y, f, **kwargs)
601        return out.fit_parameters
602    elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray):
603        out = least_squares(x, y, f, **kwargs)
604        return out.fit_parameters
605    else:
606        raise Exception('Unsupported types for x')
607
608
609def qqplot(x, o_y, func, p):
610    """Generates a quantile-quantile plot of the fit result which can be used to
611       check if the residuals of the fit are gaussian distributed.
612    """
613
614    residuals = []
615    for i_x, i_y in zip(x, o_y):
616        residuals.append((i_y - func(p, i_x)) / i_y.dvalue)
617    residuals = sorted(residuals)
618    my_y = [o.value for o in residuals]
619    probplot = scipy.stats.probplot(my_y)
620    my_x = probplot[0][0]
621    plt.figure(figsize=(8, 8 / 1.618))
622    plt.errorbar(my_x, my_y, fmt='o')
623    fit_start = my_x[0]
624    fit_stop = my_x[-1]
625    samples = np.arange(fit_start, fit_stop, 0.01)
626    plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution')
627    plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-')
628
629    plt.xlabel('Theoretical quantiles')
630    plt.ylabel('Ordered Values')
631    plt.legend()
632    plt.draw()
633
634
635def residual_plot(x, y, func, fit_res):
636    """ Generates a plot which compares the fit to the data and displays the corresponding residuals"""
637    sorted_x = sorted(x)
638    xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0])
639    xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2])
640    x_samples = np.arange(xstart, xstop + 0.01, 0.01)
641
642    plt.figure(figsize=(8, 8 / 1.618))
643    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0)
644    ax0 = plt.subplot(gs[0])
645    ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data')
646    ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0)
647    ax0.set_xticklabels([])
648    ax0.set_xlim([xstart, xstop])
649    ax0.set_xticklabels([])
650    ax0.legend()
651
652    residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y])
653    ax1 = plt.subplot(gs[1])
654    ax1.plot(x, residuals, 'ko', ls='none', markersize=5)
655    ax1.tick_params(direction='out')
656    ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True)
657    ax1.axhline(y=0.0, ls='--', color='k', marker=" ")
658    ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k')
659    ax1.set_xlim([xstart, xstop])
660    ax1.set_ylabel('Residuals')
661    plt.subplots_adjust(wspace=None, hspace=None)
662    plt.draw()
663
664
665def error_band(x, func, beta):
666    """Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta."""
667    cov = covariance(beta)
668    if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps):
669        warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning)
670
671    deriv = []
672    for i, item in enumerate(x):
673        deriv.append(np.array(egrad(func)([o.value for o in beta], item)))
674
675    err = []
676    for i, item in enumerate(x):
677        err.append(np.sqrt(deriv[i] @ cov @ deriv[i]))
678    err = np.array(err)
679
680    return err
681
682
683def ks_test(objects=None):
684    """Performs a Kolmogorov–Smirnov test for the p-values of all fit object.
685
686    Parameters
687    ----------
688    objects : list
689        List of fit results to include in the analysis (optional).
690    """
691
692    if objects is None:
693        obs_list = []
694        for obj in gc.get_objects():
695            if isinstance(obj, Fit_result):
696                obs_list.append(obj)
697    else:
698        obs_list = objects
699
700    p_values = [o.p_value for o in obs_list]
701
702    bins = len(p_values)
703    x = np.arange(0, 1.001, 0.001)
704    plt.plot(x, x, 'k', zorder=1)
705    plt.xlim(0, 1)
706    plt.ylim(0, 1)
707    plt.xlabel('p-value')
708    plt.ylabel('Cumulative probability')
709    plt.title(str(bins) + ' p-values')
710
711    n = np.arange(1, bins + 1) / np.float64(bins)
712    Xs = np.sort(p_values)
713    plt.step(Xs, n)
714    diffs = n - Xs
715    loc_max_diff = np.argmax(np.abs(diffs))
716    loc = Xs[loc_max_diff]
717    plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0))
718    plt.draw()
719
720    print(scipy.stats.kstest(p_values, 'uniform'))
#   class Fit_result(collections.abc.Sequence):
View Source
18class Fit_result(Sequence):
19    """Represents fit results.
20
21    Attributes
22    ----------
23    fit_parameters : list
24        results for the individual fit parameters,
25        also accessible via indices.
26    """
27
28    def __init__(self):
29        self.fit_parameters = None
30
31    def __getitem__(self, idx):
32        return self.fit_parameters[idx]
33
34    def __len__(self):
35        return len(self.fit_parameters)
36
37    def gamma_method(self):
38        """Apply the gamma method to all fit parameters"""
39        [o.gamma_method() for o in self.fit_parameters]
40
41    def __str__(self):
42        my_str = 'Goodness of fit:\n'
43        if hasattr(self, 'chisquare_by_dof'):
44            my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n'
45        elif hasattr(self, 'residual_variance'):
46            my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n'
47        if hasattr(self, 'chisquare_by_expected_chisquare'):
48            my_str += '\u03C7\u00b2/\u03C7\u00b2exp  = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n'
49        if hasattr(self, 'p_value'):
50            my_str += 'p-value   = ' + f'{self.p_value:2.4f}' + '\n'
51        my_str += 'Fit parameters:\n'
52        for i_par, par in enumerate(self.fit_parameters):
53            my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n'
54        return my_str
55
56    def __repr__(self):
57        m = max(map(len, list(self.__dict__.keys()))) + 1
58        return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())])

Represents fit results.

Attributes
  • fit_parameters (list): results for the individual fit parameters, also accessible via indices.
#   Fit_result()
View Source
28    def __init__(self):
29        self.fit_parameters = None
#   def gamma_method(self):
View Source
37    def gamma_method(self):
38        """Apply the gamma method to all fit parameters"""
39        [o.gamma_method() for o in self.fit_parameters]

Apply the gamma method to all fit parameters

Inherited Members
collections.abc.Sequence
index
count
#   def least_squares(x, y, func, priors=None, silent=False, **kwargs):
View Source
 61def least_squares(x, y, func, priors=None, silent=False, **kwargs):
 62    r'''Performs a non-linear fit to y = func(x).
 63
 64    Parameters
 65    ----------
 66    x : list
 67        list of floats.
 68    y : list
 69        list of Obs.
 70    func : object
 71        fit function, has to be of the form
 72
 73        ```python
 74        import autograd.numpy as anp
 75
 76        def func(a, x):
 77            return a[0] + a[1] * x + a[2] * anp.sinh(x)
 78        ```
 79
 80        For multiple x values func can be of the form
 81
 82        ```python
 83        def func(a, x):
 84            (x1, x2) = x
 85            return a[0] * x1 ** 2 + a[1] * x2
 86        ```
 87
 88        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
 89        will not work.
 90    priors : list, optional
 91        priors has to be a list with an entry for every parameter in the fit. The entries can either be
 92        Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like
 93        0.548(23), 500(40) or 0.5(0.4)
 94    silent : bool, optional
 95        If true all output to the console is omitted (default False).
 96    initial_guess : list
 97        can provide an initial guess for the input parameters. Relevant for
 98        non-linear fits with many parameters.
 99    method : str, optional
100        can be used to choose an alternative method for the minimization of chisquare.
101        The possible methods are the ones which can be used for scipy.optimize.minimize and
102        migrad of iminuit. If no method is specified, Levenberg-Marquard is used.
103        Reliable alternatives are migrad, Powell and Nelder-Mead.
104    correlated_fit : bool
105        If True, use the full inverse covariance matrix in the definition of the chisquare cost function.
106        For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
107        In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix).
108        This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning).
109        At the moment this option only works for `prior==None` and when no `method` is given.
110    expected_chisquare : bool
111        If True estimates the expected chisquare which is
112        corrected by effects caused by correlated input data (default False).
113    resplot : bool
114        If True, a plot which displays fit, data and residuals is generated (default False).
115    qqplot : bool
116        If True, a quantile-quantile plot of the fit result is generated (default False).
117    '''
118    if priors is not None:
119        return _prior_fit(x, y, func, priors, silent=silent, **kwargs)
120    else:
121        return _standard_fit(x, y, func, silent=silent, **kwargs)

Performs a non-linear fit to y = func(x).

Parameters
  • x (list): list of floats.
  • y (list): list of Obs.
  • func (object): fit function, has to be of the form

    import autograd.numpy as anp
    
    def func(a, x):
       return a[0] + a[1] * x + a[2] * anp.sinh(x)
    

    For multiple x values func can be of the form

    def func(a, x):
       (x1, x2) = x
       return a[0] * x1 ** 2 + a[1] * x2
    

    It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.

  • priors (list, optional): priors has to be a list with an entry for every parameter in the fit. The entries can either be Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 0.548(23), 500(40) or 0.5(0.4)
  • silent (bool, optional): If true all output to the console is omitted (default False).
  • initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters.
  • method (str, optional): can be used to choose an alternative method for the minimization of chisquare. The possible methods are the ones which can be used for scipy.optimize.minimize and migrad of iminuit. If no method is specified, Levenberg-Marquard is used. Reliable alternatives are migrad, Powell and Nelder-Mead.
  • correlated_fit (bool): If True, use the full inverse covariance matrix in the definition of the chisquare cost function. For details about how the covariance matrix is estimated see pyerrors.obs.covariance. In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix). This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning). At the moment this option only works for prior==None and when no method is given.
  • expected_chisquare (bool): If True estimates the expected chisquare which is corrected by effects caused by correlated input data (default False).
  • resplot (bool): If True, a plot which displays fit, data and residuals is generated (default False).
  • qqplot (bool): If True, a quantile-quantile plot of the fit result is generated (default False).
#   def total_least_squares(x, y, func, silent=False, **kwargs):
View Source
124def total_least_squares(x, y, func, silent=False, **kwargs):
125    r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
126
127    Parameters
128    ----------
129    x : list
130        list of Obs, or a tuple of lists of Obs
131    y : list
132        list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
133    func : object
134        func has to be of the form
135
136        ```python
137        import autograd.numpy as anp
138
139        def func(a, x):
140            return a[0] + a[1] * x + a[2] * anp.sinh(x)
141        ```
142
143        For multiple x values func can be of the form
144
145        ```python
146        def func(a, x):
147            (x1, x2) = x
148            return a[0] * x1 ** 2 + a[1] * x2
149        ```
150
151        It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
152        will not work.
153    silent : bool, optional
154        If true all output to the console is omitted (default False).
155    initial_guess : list
156        can provide an initial guess for the input parameters. Relevant for non-linear
157        fits with many parameters.
158    expected_chisquare : bool
159        If true prints the expected chisquare which is
160        corrected by effects caused by correlated input data.
161        This can take a while as the full correlation matrix
162        has to be calculated (default False).
163
164    Notes
165    -----
166    Based on the orthogonal distance regression module of scipy
167    '''
168
169    output = Fit_result()
170
171    output.fit_function = func
172
173    x = np.array(x)
174
175    x_shape = x.shape
176
177    if not callable(func):
178        raise TypeError('func has to be a function.')
179
180    for i in range(25):
181        try:
182            func(np.arange(i), x.T[0])
183        except Exception:
184            pass
185        else:
186            break
187
188    n_parms = i
189    if not silent:
190        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
191
192    x_f = np.vectorize(lambda o: o.value)(x)
193    dx_f = np.vectorize(lambda o: o.dvalue)(x)
194    y_f = np.array([o.value for o in y])
195    dy_f = np.array([o.dvalue for o in y])
196
197    if np.any(np.asarray(dx_f) <= 0.0):
198        raise Exception('No x errors available, run the gamma method first.')
199
200    if np.any(np.asarray(dy_f) <= 0.0):
201        raise Exception('No y errors available, run the gamma method first.')
202
203    if 'initial_guess' in kwargs:
204        x0 = kwargs.get('initial_guess')
205        if len(x0) != n_parms:
206            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
207    else:
208        x0 = [1] * n_parms
209
210    data = RealData(x_f, y_f, sx=dx_f, sy=dy_f)
211    model = Model(func)
212    odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps)
213    odr.set_job(fit_type=0, deriv=1)
214    out = odr.run()
215
216    output.residual_variance = out.res_var
217
218    output.method = 'ODR'
219
220    output.message = out.stopreason
221
222    output.xplus = out.xplus
223
224    if not silent:
225        print('Method: ODR')
226        print(*out.stopreason)
227        print('Residual variance:', output.residual_variance)
228
229    if out.info > 3:
230        raise Exception('The minimization procedure did not converge.')
231
232    m = x_f.size
233
234    def odr_chisquare(p):
235        model = func(p[:n_parms], p[n_parms:].reshape(x_shape))
236        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2)
237        return chisq
238
239    if kwargs.get('expected_chisquare') is True:
240        W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel()))))
241
242        if kwargs.get('covariance') is not None:
243            cov = kwargs.get('covariance')
244        else:
245            cov = covariance(np.concatenate((y, x.ravel())))
246
247        number_of_x_parameters = int(m / x_f.shape[-1])
248
249        old_jac = jacobian(func)(out.beta, out.xplus)
250        fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0)))
251        fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0])))
252        new_jac = np.concatenate((fused_row1, fused_row2), axis=1)
253
254        A = W @ new_jac
255        P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
256        expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W)
257        if expected_chisquare <= 0.0:
258            warnings.warn("Negative expected_chisquare.", RuntimeWarning)
259            expected_chisquare = np.abs(expected_chisquare)
260        output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare
261        if not silent:
262            print('chisquare/expected_chisquare:',
263                  output.chisquare_by_expected_chisquare)
264
265    fitp = out.beta
266    try:
267        hess_inv = np.linalg.pinv(jacobian(jacobian(odr_chisquare))(np.concatenate((fitp, out.xplus.ravel()))))
268    except TypeError:
269        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
270
271    def odr_chisquare_compact_x(d):
272        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
273        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
274        return chisq
275
276    jac_jac_x = jacobian(jacobian(odr_chisquare_compact_x))(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel())))
277
278    deriv_x = -hess_inv @ jac_jac_x[:n_parms + m, n_parms + m:]
279
280    def odr_chisquare_compact_y(d):
281        model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
282        chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
283        return chisq
284
285    jac_jac_y = jacobian(jacobian(odr_chisquare_compact_y))(np.concatenate((fitp, out.xplus.ravel(), y_f)))
286
287    deriv_y = -hess_inv @ jac_jac_y[:n_parms + m, n_parms + m:]
288
289    result = []
290    for i in range(n_parms):
291        result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i])))
292
293    output.fit_parameters = result
294
295    output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel())))
296    output.dof = x.shape[-1] - n_parms
297    output.p_value = 1 - chi2.cdf(output.odr_chisquare, output.dof)
298
299    return output

Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.

Parameters
  • x (list): list of Obs, or a tuple of lists of Obs
  • y (list): list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
  • func (object): func has to be of the form

    import autograd.numpy as anp
    
    def func(a, x):
       return a[0] + a[1] * x + a[2] * anp.sinh(x)
    

    For multiple x values func can be of the form

    def func(a, x):
       (x1, x2) = x
       return a[0] * x1 ** 2 + a[1] * x2
    

    It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.

  • silent (bool, optional): If true all output to the console is omitted (default False).
  • initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters.
  • expected_chisquare (bool): If true prints the expected chisquare which is corrected by effects caused by correlated input data. This can take a while as the full correlation matrix has to be calculated (default False).
Notes

Based on the orthogonal distance regression module of scipy

#   def fit_lin(x, y, **kwargs):
View Source
584def fit_lin(x, y, **kwargs):
585    """Performs a linear fit to y = n + m * x and returns two Obs n, m.
586
587    Parameters
588    ----------
589    x : list
590        Can either be a list of floats in which case no xerror is assumed, or
591        a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
592    y : list
593        List of Obs, the dvalues of the Obs are used as yerror for the fit.
594    """
595
596    def f(a, x):
597        y = a[0] + a[1] * x
598        return y
599
600    if all(isinstance(n, Obs) for n in x):
601        out = total_least_squares(x, y, f, **kwargs)
602        return out.fit_parameters
603    elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray):
604        out = least_squares(x, y, f, **kwargs)
605        return out.fit_parameters
606    else:
607        raise Exception('Unsupported types for x')

Performs a linear fit to y = n + m * x and returns two Obs n, m.

Parameters
  • x (list): Can either be a list of floats in which case no xerror is assumed, or a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
  • y (list): List of Obs, the dvalues of the Obs are used as yerror for the fit.
#   def qqplot(x, o_y, func, p):
View Source
610def qqplot(x, o_y, func, p):
611    """Generates a quantile-quantile plot of the fit result which can be used to
612       check if the residuals of the fit are gaussian distributed.
613    """
614
615    residuals = []
616    for i_x, i_y in zip(x, o_y):
617        residuals.append((i_y - func(p, i_x)) / i_y.dvalue)
618    residuals = sorted(residuals)
619    my_y = [o.value for o in residuals]
620    probplot = scipy.stats.probplot(my_y)
621    my_x = probplot[0][0]
622    plt.figure(figsize=(8, 8 / 1.618))
623    plt.errorbar(my_x, my_y, fmt='o')
624    fit_start = my_x[0]
625    fit_stop = my_x[-1]
626    samples = np.arange(fit_start, fit_stop, 0.01)
627    plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution')
628    plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-')
629
630    plt.xlabel('Theoretical quantiles')
631    plt.ylabel('Ordered Values')
632    plt.legend()
633    plt.draw()

Generates a quantile-quantile plot of the fit result which can be used to check if the residuals of the fit are gaussian distributed.

#   def residual_plot(x, y, func, fit_res):
View Source
636def residual_plot(x, y, func, fit_res):
637    """ Generates a plot which compares the fit to the data and displays the corresponding residuals"""
638    sorted_x = sorted(x)
639    xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0])
640    xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2])
641    x_samples = np.arange(xstart, xstop + 0.01, 0.01)
642
643    plt.figure(figsize=(8, 8 / 1.618))
644    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0)
645    ax0 = plt.subplot(gs[0])
646    ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data')
647    ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0)
648    ax0.set_xticklabels([])
649    ax0.set_xlim([xstart, xstop])
650    ax0.set_xticklabels([])
651    ax0.legend()
652
653    residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y])
654    ax1 = plt.subplot(gs[1])
655    ax1.plot(x, residuals, 'ko', ls='none', markersize=5)
656    ax1.tick_params(direction='out')
657    ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True)
658    ax1.axhline(y=0.0, ls='--', color='k', marker=" ")
659    ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k')
660    ax1.set_xlim([xstart, xstop])
661    ax1.set_ylabel('Residuals')
662    plt.subplots_adjust(wspace=None, hspace=None)
663    plt.draw()

Generates a plot which compares the fit to the data and displays the corresponding residuals

#   def error_band(x, func, beta):
View Source
666def error_band(x, func, beta):
667    """Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta."""
668    cov = covariance(beta)
669    if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps):
670        warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning)
671
672    deriv = []
673    for i, item in enumerate(x):
674        deriv.append(np.array(egrad(func)([o.value for o in beta], item)))
675
676    err = []
677    for i, item in enumerate(x):
678        err.append(np.sqrt(deriv[i] @ cov @ deriv[i]))
679    err = np.array(err)
680
681    return err

Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta.

#   def ks_test(objects=None):
View Source
684def ks_test(objects=None):
685    """Performs a Kolmogorov–Smirnov test for the p-values of all fit object.
686
687    Parameters
688    ----------
689    objects : list
690        List of fit results to include in the analysis (optional).
691    """
692
693    if objects is None:
694        obs_list = []
695        for obj in gc.get_objects():
696            if isinstance(obj, Fit_result):
697                obs_list.append(obj)
698    else:
699        obs_list = objects
700
701    p_values = [o.p_value for o in obs_list]
702
703    bins = len(p_values)
704    x = np.arange(0, 1.001, 0.001)
705    plt.plot(x, x, 'k', zorder=1)
706    plt.xlim(0, 1)
707    plt.ylim(0, 1)
708    plt.xlabel('p-value')
709    plt.ylabel('Cumulative probability')
710    plt.title(str(bins) + ' p-values')
711
712    n = np.arange(1, bins + 1) / np.float64(bins)
713    Xs = np.sort(p_values)
714    plt.step(Xs, n)
715    diffs = n - Xs
716    loc_max_diff = np.argmax(np.abs(diffs))
717    loc = Xs[loc_max_diff]
718    plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0))
719    plt.draw()
720
721    print(scipy.stats.kstest(p_values, 'uniform'))

Performs a Kolmogorov–Smirnov test for the p-values of all fit object.

Parameters
  • objects (list): List of fit results to include in the analysis (optional).