pyerrors.fits
1import gc 2from collections.abc import Sequence 3import warnings 4import numpy as np 5import autograd.numpy as anp 6import scipy.optimize 7import scipy.stats 8import matplotlib.pyplot as plt 9from matplotlib import gridspec 10from scipy.odr import ODR, Model, RealData 11import iminuit 12from autograd import jacobian as auto_jacobian 13from autograd import hessian as auto_hessian 14from autograd import elementwise_grad as egrad 15from numdifftools import Jacobian as num_jacobian 16from numdifftools import Hessian as num_hessian 17from .obs import Obs, derived_observable, covariance, cov_Obs 18 19 20class Fit_result(Sequence): 21 """Represents fit results. 22 23 Attributes 24 ---------- 25 fit_parameters : list 26 results for the individual fit parameters, 27 also accessible via indices. 28 chisquare_by_dof : float 29 reduced chisquare. 30 p_value : float 31 p-value of the fit 32 t2_p_value : float 33 Hotelling t-squared p-value for correlated fits. 34 """ 35 36 def __init__(self): 37 self.fit_parameters = None 38 39 def __getitem__(self, idx): 40 return self.fit_parameters[idx] 41 42 def __len__(self): 43 return len(self.fit_parameters) 44 45 def gamma_method(self, **kwargs): 46 """Apply the gamma method to all fit parameters""" 47 [o.gamma_method(**kwargs) for o in self.fit_parameters] 48 49 gm = gamma_method 50 51 def __str__(self): 52 my_str = 'Goodness of fit:\n' 53 if hasattr(self, 'chisquare_by_dof'): 54 my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n' 55 elif hasattr(self, 'residual_variance'): 56 my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n' 57 if hasattr(self, 'chisquare_by_expected_chisquare'): 58 my_str += '\u03C7\u00b2/\u03C7\u00b2exp = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n' 59 if hasattr(self, 'p_value'): 60 my_str += 'p-value = ' + f'{self.p_value:2.4f}' + '\n' 61 if hasattr(self, 't2_p_value'): 62 my_str += 't\u00B2p-value = ' + f'{self.t2_p_value:2.4f}' + '\n' 63 my_str += 'Fit parameters:\n' 64 for i_par, par in enumerate(self.fit_parameters): 65 my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n' 66 return my_str 67 68 def __repr__(self): 69 m = max(map(len, list(self.__dict__.keys()))) + 1 70 return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())]) 71 72 73def least_squares(x, y, func, priors=None, silent=False, **kwargs): 74 r'''Performs a non-linear fit to y = func(x). 75 76 Parameters 77 ---------- 78 x : list 79 list of floats. 80 y : list 81 list of Obs. 82 func : object 83 fit function, has to be of the form 84 85 ```python 86 import autograd.numpy as anp 87 88 def func(a, x): 89 return a[0] + a[1] * x + a[2] * anp.sinh(x) 90 ``` 91 92 For multiple x values func can be of the form 93 94 ```python 95 def func(a, x): 96 (x1, x2) = x 97 return a[0] * x1 ** 2 + a[1] * x2 98 ``` 99 100 It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation 101 will not work. 102 priors : list, optional 103 priors has to be a list with an entry for every parameter in the fit. The entries can either be 104 Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 105 0.548(23), 500(40) or 0.5(0.4) 106 silent : bool, optional 107 If true all output to the console is omitted (default False). 108 initial_guess : list 109 can provide an initial guess for the input parameters. Relevant for 110 non-linear fits with many parameters. In case of correlated fits the guess is used to perform 111 an uncorrelated fit which then serves as guess for the correlated fit. 112 method : str, optional 113 can be used to choose an alternative method for the minimization of chisquare. 114 The possible methods are the ones which can be used for scipy.optimize.minimize and 115 migrad of iminuit. If no method is specified, Levenberg-Marquard is used. 116 Reliable alternatives are migrad, Powell and Nelder-Mead. 117 correlated_fit : bool 118 If True, use the full inverse covariance matrix in the definition of the chisquare cost function. 119 For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`. 120 In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix). 121 This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning). 122 At the moment this option only works for `prior==None` and when no `method` is given. 123 expected_chisquare : bool 124 If True estimates the expected chisquare which is 125 corrected by effects caused by correlated input data (default False). 126 resplot : bool 127 If True, a plot which displays fit, data and residuals is generated (default False). 128 qqplot : bool 129 If True, a quantile-quantile plot of the fit result is generated (default False). 130 num_grad : bool 131 Use numerical differentation instead of automatic differentiation to perform the error propagation (default False). 132 ''' 133 if priors is not None: 134 return _prior_fit(x, y, func, priors, silent=silent, **kwargs) 135 else: 136 return _standard_fit(x, y, func, silent=silent, **kwargs) 137 138 139def total_least_squares(x, y, func, silent=False, **kwargs): 140 r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters. 141 142 Parameters 143 ---------- 144 x : list 145 list of Obs, or a tuple of lists of Obs 146 y : list 147 list of Obs. The dvalues of the Obs are used as x- and yerror for the fit. 148 func : object 149 func has to be of the form 150 151 ```python 152 import autograd.numpy as anp 153 154 def func(a, x): 155 return a[0] + a[1] * x + a[2] * anp.sinh(x) 156 ``` 157 158 For multiple x values func can be of the form 159 160 ```python 161 def func(a, x): 162 (x1, x2) = x 163 return a[0] * x1 ** 2 + a[1] * x2 164 ``` 165 166 It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation 167 will not work. 168 silent : bool, optional 169 If true all output to the console is omitted (default False). 170 initial_guess : list 171 can provide an initial guess for the input parameters. Relevant for non-linear 172 fits with many parameters. 173 expected_chisquare : bool 174 If true prints the expected chisquare which is 175 corrected by effects caused by correlated input data. 176 This can take a while as the full correlation matrix 177 has to be calculated (default False). 178 num_grad : bool 179 Use numerical differentation instead of automatic differentiation to perform the error propagation (default False). 180 181 Notes 182 ----- 183 Based on the orthogonal distance regression module of scipy 184 ''' 185 186 output = Fit_result() 187 188 output.fit_function = func 189 190 x = np.array(x) 191 192 x_shape = x.shape 193 194 if kwargs.get('num_grad') is True: 195 jacobian = num_jacobian 196 hessian = num_hessian 197 else: 198 jacobian = auto_jacobian 199 hessian = auto_hessian 200 201 if not callable(func): 202 raise TypeError('func has to be a function.') 203 204 for i in range(42): 205 try: 206 func(np.arange(i), x.T[0]) 207 except TypeError: 208 continue 209 except IndexError: 210 continue 211 else: 212 break 213 else: 214 raise RuntimeError("Fit function is not valid.") 215 216 n_parms = i 217 if not silent: 218 print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1)) 219 220 x_f = np.vectorize(lambda o: o.value)(x) 221 dx_f = np.vectorize(lambda o: o.dvalue)(x) 222 y_f = np.array([o.value for o in y]) 223 dy_f = np.array([o.dvalue for o in y]) 224 225 if np.any(np.asarray(dx_f) <= 0.0): 226 raise Exception('No x errors available, run the gamma method first.') 227 228 if np.any(np.asarray(dy_f) <= 0.0): 229 raise Exception('No y errors available, run the gamma method first.') 230 231 if 'initial_guess' in kwargs: 232 x0 = kwargs.get('initial_guess') 233 if len(x0) != n_parms: 234 raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms)) 235 else: 236 x0 = [1] * n_parms 237 238 data = RealData(x_f, y_f, sx=dx_f, sy=dy_f) 239 model = Model(func) 240 odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps) 241 odr.set_job(fit_type=0, deriv=1) 242 out = odr.run() 243 244 output.residual_variance = out.res_var 245 246 output.method = 'ODR' 247 248 output.message = out.stopreason 249 250 output.xplus = out.xplus 251 252 if not silent: 253 print('Method: ODR') 254 print(*out.stopreason) 255 print('Residual variance:', output.residual_variance) 256 257 if out.info > 3: 258 raise Exception('The minimization procedure did not converge.') 259 260 m = x_f.size 261 262 def odr_chisquare(p): 263 model = func(p[:n_parms], p[n_parms:].reshape(x_shape)) 264 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2) 265 return chisq 266 267 if kwargs.get('expected_chisquare') is True: 268 W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel())))) 269 270 if kwargs.get('covariance') is not None: 271 cov = kwargs.get('covariance') 272 else: 273 cov = covariance(np.concatenate((y, x.ravel()))) 274 275 number_of_x_parameters = int(m / x_f.shape[-1]) 276 277 old_jac = jacobian(func)(out.beta, out.xplus) 278 fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0))) 279 fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0]))) 280 new_jac = np.concatenate((fused_row1, fused_row2), axis=1) 281 282 A = W @ new_jac 283 P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T 284 expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W) 285 if expected_chisquare <= 0.0: 286 warnings.warn("Negative expected_chisquare.", RuntimeWarning) 287 expected_chisquare = np.abs(expected_chisquare) 288 output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare 289 if not silent: 290 print('chisquare/expected_chisquare:', 291 output.chisquare_by_expected_chisquare) 292 293 fitp = out.beta 294 try: 295 hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel()))) 296 except TypeError: 297 raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None 298 299 def odr_chisquare_compact_x(d): 300 model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape)) 301 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) 302 return chisq 303 304 jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel()))) 305 306 # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv 307 try: 308 deriv_x = -scipy.linalg.solve(hess, jac_jac_x[:n_parms + m, n_parms + m:]) 309 except np.linalg.LinAlgError: 310 raise Exception("Cannot invert hessian matrix.") 311 312 def odr_chisquare_compact_y(d): 313 model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape)) 314 chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) 315 return chisq 316 317 jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f))) 318 319 # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv 320 try: 321 deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms + m, n_parms + m:]) 322 except np.linalg.LinAlgError: 323 raise Exception("Cannot invert hessian matrix.") 324 325 result = [] 326 for i in range(n_parms): 327 result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i]))) 328 329 output.fit_parameters = result 330 331 output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) 332 output.dof = x.shape[-1] - n_parms 333 output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof) 334 335 return output 336 337 338def _prior_fit(x, y, func, priors, silent=False, **kwargs): 339 output = Fit_result() 340 341 output.fit_function = func 342 343 x = np.asarray(x) 344 345 if kwargs.get('num_grad') is True: 346 hessian = num_hessian 347 else: 348 hessian = auto_hessian 349 350 if not callable(func): 351 raise TypeError('func has to be a function.') 352 353 for i in range(100): 354 try: 355 func(np.arange(i), 0) 356 except TypeError: 357 continue 358 except IndexError: 359 continue 360 else: 361 break 362 else: 363 raise RuntimeError("Fit function is not valid.") 364 365 n_parms = i 366 367 if n_parms != len(priors): 368 raise Exception('Priors does not have the correct length.') 369 370 def extract_val_and_dval(string): 371 split_string = string.split('(') 372 if '.' in split_string[0] and '.' not in split_string[1][:-1]: 373 factor = 10 ** -len(split_string[0].partition('.')[2]) 374 else: 375 factor = 1 376 return float(split_string[0]), float(split_string[1][:-1]) * factor 377 378 loc_priors = [] 379 for i_n, i_prior in enumerate(priors): 380 if isinstance(i_prior, Obs): 381 loc_priors.append(i_prior) 382 else: 383 loc_val, loc_dval = extract_val_and_dval(i_prior) 384 loc_priors.append(cov_Obs(loc_val, loc_dval ** 2, '#prior' + str(i_n) + f"_{np.random.randint(2147483647):010d}")) 385 386 output.priors = loc_priors 387 388 if not silent: 389 print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1)) 390 391 y_f = [o.value for o in y] 392 dy_f = [o.dvalue for o in y] 393 394 if np.any(np.asarray(dy_f) <= 0.0): 395 raise Exception('No y errors available, run the gamma method first.') 396 397 p_f = [o.value for o in loc_priors] 398 dp_f = [o.dvalue for o in loc_priors] 399 400 if np.any(np.asarray(dp_f) <= 0.0): 401 raise Exception('No prior errors available, run the gamma method first.') 402 403 if 'initial_guess' in kwargs: 404 x0 = kwargs.get('initial_guess') 405 if len(x0) != n_parms: 406 raise Exception('Initial guess does not have the correct length.') 407 else: 408 x0 = p_f 409 410 def chisqfunc(p): 411 model = func(p, x) 412 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((p_f - p) / dp_f) ** 2) 413 return chisq 414 415 if not silent: 416 print('Method: migrad') 417 418 m = iminuit.Minuit(chisqfunc, x0) 419 m.errordef = 1 420 m.print_level = 0 421 if 'tol' in kwargs: 422 m.tol = kwargs.get('tol') 423 else: 424 m.tol = 1e-4 425 m.migrad() 426 params = np.asarray(m.values) 427 428 output.chisquare_by_dof = m.fval / len(x) 429 430 output.method = 'migrad' 431 432 if not silent: 433 print('chisquare/d.o.f.:', output.chisquare_by_dof) 434 435 if not m.fmin.is_valid: 436 raise Exception('The minimization procedure did not converge.') 437 438 hess = hessian(chisqfunc)(params) 439 hess_inv = np.linalg.pinv(hess) 440 441 def chisqfunc_compact(d): 442 model = func(d[:n_parms], x) 443 chisq = anp.sum(((d[n_parms: n_parms + len(x)] - model) / dy_f) ** 2) + anp.sum(((d[n_parms + len(x):] - d[:n_parms]) / dp_f) ** 2) 444 return chisq 445 446 jac_jac = hessian(chisqfunc_compact)(np.concatenate((params, y_f, p_f))) 447 448 deriv = -hess_inv @ jac_jac[:n_parms, n_parms:] 449 450 result = [] 451 for i in range(n_parms): 452 result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * params[i], list(y) + list(loc_priors), man_grad=list(deriv[i]))) 453 454 output.fit_parameters = result 455 output.chisquare = chisqfunc(np.asarray(params)) 456 457 if kwargs.get('resplot') is True: 458 residual_plot(x, y, func, result) 459 460 if kwargs.get('qqplot') is True: 461 qqplot(x, y, func, result) 462 463 return output 464 465 466def _standard_fit(x, y, func, silent=False, **kwargs): 467 468 output = Fit_result() 469 470 output.fit_function = func 471 472 x = np.asarray(x) 473 474 if kwargs.get('num_grad') is True: 475 jacobian = num_jacobian 476 hessian = num_hessian 477 else: 478 jacobian = auto_jacobian 479 hessian = auto_hessian 480 481 if x.shape[-1] != len(y): 482 raise Exception('x and y input have to have the same length') 483 484 if len(x.shape) > 2: 485 raise Exception('Unknown format for x values') 486 487 if not callable(func): 488 raise TypeError('func has to be a function.') 489 490 for i in range(42): 491 try: 492 func(np.arange(i), x.T[0]) 493 except TypeError: 494 continue 495 except IndexError: 496 continue 497 else: 498 break 499 else: 500 raise RuntimeError("Fit function is not valid.") 501 502 n_parms = i 503 504 if not silent: 505 print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1)) 506 507 y_f = [o.value for o in y] 508 dy_f = [o.dvalue for o in y] 509 510 if np.any(np.asarray(dy_f) <= 0.0): 511 raise Exception('No y errors available, run the gamma method first.') 512 513 if 'initial_guess' in kwargs: 514 x0 = kwargs.get('initial_guess') 515 if len(x0) != n_parms: 516 raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms)) 517 else: 518 x0 = [0.1] * n_parms 519 520 if kwargs.get('correlated_fit') is True: 521 corr = covariance(y, correlation=True, **kwargs) 522 covdiag = np.diag(1 / np.asarray(dy_f)) 523 condn = np.linalg.cond(corr) 524 if condn > 0.1 / np.finfo(float).eps: 525 raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})") 526 if condn > 1e13: 527 warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning) 528 chol = np.linalg.cholesky(corr) 529 chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True) 530 531 def chisqfunc_corr(p): 532 model = func(p, x) 533 chisq = anp.sum(anp.dot(chol_inv, (y_f - model)) ** 2) 534 return chisq 535 536 def chisqfunc(p): 537 model = func(p, x) 538 chisq = anp.sum(((y_f - model) / dy_f) ** 2) 539 return chisq 540 541 output.method = kwargs.get('method', 'Levenberg-Marquardt') 542 if not silent: 543 print('Method:', output.method) 544 545 if output.method != 'Levenberg-Marquardt': 546 if output.method == 'migrad': 547 fit_result = iminuit.minimize(chisqfunc, x0, tol=1e-4) # Stopping criterion 0.002 * tol * errordef 548 if kwargs.get('correlated_fit') is True: 549 fit_result = iminuit.minimize(chisqfunc_corr, fit_result.x, tol=1e-4) # Stopping criterion 0.002 * tol * errordef 550 output.iterations = fit_result.nfev 551 else: 552 fit_result = scipy.optimize.minimize(chisqfunc, x0, method=kwargs.get('method'), tol=1e-12) 553 if kwargs.get('correlated_fit') is True: 554 fit_result = scipy.optimize.minimize(chisqfunc_corr, fit_result.x, method=kwargs.get('method'), tol=1e-12) 555 output.iterations = fit_result.nit 556 557 chisquare = fit_result.fun 558 559 else: 560 if kwargs.get('correlated_fit') is True: 561 def chisqfunc_residuals_corr(p): 562 model = func(p, x) 563 chisq = anp.dot(chol_inv, (y_f - model)) 564 return chisq 565 566 def chisqfunc_residuals(p): 567 model = func(p, x) 568 chisq = ((y_f - model) / dy_f) 569 return chisq 570 571 fit_result = scipy.optimize.least_squares(chisqfunc_residuals, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15) 572 if kwargs.get('correlated_fit') is True: 573 fit_result = scipy.optimize.least_squares(chisqfunc_residuals_corr, fit_result.x, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15) 574 575 chisquare = np.sum(fit_result.fun ** 2) 576 if kwargs.get('correlated_fit') is True: 577 assert np.isclose(chisquare, chisqfunc_corr(fit_result.x), atol=1e-14) 578 else: 579 assert np.isclose(chisquare, chisqfunc(fit_result.x), atol=1e-14) 580 581 output.iterations = fit_result.nfev 582 583 if not fit_result.success: 584 raise Exception('The minimization procedure did not converge.') 585 586 if x.shape[-1] - n_parms > 0: 587 output.chisquare_by_dof = chisquare / (x.shape[-1] - n_parms) 588 else: 589 output.chisquare_by_dof = float('nan') 590 591 output.message = fit_result.message 592 if not silent: 593 print(fit_result.message) 594 print('chisquare/d.o.f.:', output.chisquare_by_dof) 595 596 if kwargs.get('expected_chisquare') is True: 597 if kwargs.get('correlated_fit') is not True: 598 W = np.diag(1 / np.asarray(dy_f)) 599 cov = covariance(y) 600 A = W @ jacobian(func)(fit_result.x, x) 601 P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T 602 expected_chisquare = np.trace((np.identity(x.shape[-1]) - P_phi) @ W @ cov @ W) 603 output.chisquare_by_expected_chisquare = chisquare / expected_chisquare 604 if not silent: 605 print('chisquare/expected_chisquare:', 606 output.chisquare_by_expected_chisquare) 607 608 fitp = fit_result.x 609 try: 610 if kwargs.get('correlated_fit') is True: 611 hess = hessian(chisqfunc_corr)(fitp) 612 else: 613 hess = hessian(chisqfunc)(fitp) 614 except TypeError: 615 raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None 616 617 if kwargs.get('correlated_fit') is True: 618 def chisqfunc_compact(d): 619 model = func(d[:n_parms], x) 620 chisq = anp.sum(anp.dot(chol_inv, (d[n_parms:] - model)) ** 2) 621 return chisq 622 623 else: 624 def chisqfunc_compact(d): 625 model = func(d[:n_parms], x) 626 chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2) 627 return chisq 628 629 jac_jac = hessian(chisqfunc_compact)(np.concatenate((fitp, y_f))) 630 631 # Compute hess^{-1} @ jac_jac[:n_parms, n_parms:] using LAPACK dgesv 632 try: 633 deriv = -scipy.linalg.solve(hess, jac_jac[:n_parms, n_parms:]) 634 except np.linalg.LinAlgError: 635 raise Exception("Cannot invert hessian matrix.") 636 637 result = [] 638 for i in range(n_parms): 639 result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * fit_result.x[i], list(y), man_grad=list(deriv[i]))) 640 641 output.fit_parameters = result 642 643 output.chisquare = chisquare 644 output.dof = x.shape[-1] - n_parms 645 output.p_value = 1 - scipy.stats.chi2.cdf(output.chisquare, output.dof) 646 # Hotelling t-squared p-value for correlated fits. 647 if kwargs.get('correlated_fit') is True: 648 n_cov = np.min(np.vectorize(lambda x: x.N)(y)) 649 output.t2_p_value = 1 - scipy.stats.f.cdf((n_cov - output.dof) / (output.dof * (n_cov - 1)) * output.chisquare, 650 output.dof, n_cov - output.dof) 651 652 if kwargs.get('resplot') is True: 653 residual_plot(x, y, func, result) 654 655 if kwargs.get('qqplot') is True: 656 qqplot(x, y, func, result) 657 658 return output 659 660 661def fit_lin(x, y, **kwargs): 662 """Performs a linear fit to y = n + m * x and returns two Obs n, m. 663 664 Parameters 665 ---------- 666 x : list 667 Can either be a list of floats in which case no xerror is assumed, or 668 a list of Obs, where the dvalues of the Obs are used as xerror for the fit. 669 y : list 670 List of Obs, the dvalues of the Obs are used as yerror for the fit. 671 """ 672 673 def f(a, x): 674 y = a[0] + a[1] * x 675 return y 676 677 if all(isinstance(n, Obs) for n in x): 678 out = total_least_squares(x, y, f, **kwargs) 679 return out.fit_parameters 680 elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray): 681 out = least_squares(x, y, f, **kwargs) 682 return out.fit_parameters 683 else: 684 raise Exception('Unsupported types for x') 685 686 687def qqplot(x, o_y, func, p): 688 """Generates a quantile-quantile plot of the fit result which can be used to 689 check if the residuals of the fit are gaussian distributed. 690 """ 691 692 residuals = [] 693 for i_x, i_y in zip(x, o_y): 694 residuals.append((i_y - func(p, i_x)) / i_y.dvalue) 695 residuals = sorted(residuals) 696 my_y = [o.value for o in residuals] 697 probplot = scipy.stats.probplot(my_y) 698 my_x = probplot[0][0] 699 plt.figure(figsize=(8, 8 / 1.618)) 700 plt.errorbar(my_x, my_y, fmt='o') 701 fit_start = my_x[0] 702 fit_stop = my_x[-1] 703 samples = np.arange(fit_start, fit_stop, 0.01) 704 plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution') 705 plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-') 706 707 plt.xlabel('Theoretical quantiles') 708 plt.ylabel('Ordered Values') 709 plt.legend() 710 plt.draw() 711 712 713def residual_plot(x, y, func, fit_res): 714 """ Generates a plot which compares the fit to the data and displays the corresponding residuals""" 715 sorted_x = sorted(x) 716 xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0]) 717 xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2]) 718 x_samples = np.arange(xstart, xstop + 0.01, 0.01) 719 720 plt.figure(figsize=(8, 8 / 1.618)) 721 gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0) 722 ax0 = plt.subplot(gs[0]) 723 ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data') 724 ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0) 725 ax0.set_xticklabels([]) 726 ax0.set_xlim([xstart, xstop]) 727 ax0.set_xticklabels([]) 728 ax0.legend() 729 730 residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y]) 731 ax1 = plt.subplot(gs[1]) 732 ax1.plot(x, residuals, 'ko', ls='none', markersize=5) 733 ax1.tick_params(direction='out') 734 ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True) 735 ax1.axhline(y=0.0, ls='--', color='k', marker=" ") 736 ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k') 737 ax1.set_xlim([xstart, xstop]) 738 ax1.set_ylabel('Residuals') 739 plt.subplots_adjust(wspace=None, hspace=None) 740 plt.draw() 741 742 743def error_band(x, func, beta): 744 """Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta.""" 745 cov = covariance(beta) 746 if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps): 747 warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning) 748 749 deriv = [] 750 for i, item in enumerate(x): 751 deriv.append(np.array(egrad(func)([o.value for o in beta], item))) 752 753 err = [] 754 for i, item in enumerate(x): 755 err.append(np.sqrt(deriv[i] @ cov @ deriv[i])) 756 err = np.array(err) 757 758 return err 759 760 761def ks_test(objects=None): 762 """Performs a Kolmogorov–Smirnov test for the p-values of all fit object. 763 764 Parameters 765 ---------- 766 objects : list 767 List of fit results to include in the analysis (optional). 768 """ 769 770 if objects is None: 771 obs_list = [] 772 for obj in gc.get_objects(): 773 if isinstance(obj, Fit_result): 774 obs_list.append(obj) 775 else: 776 obs_list = objects 777 778 p_values = [o.p_value for o in obs_list] 779 780 bins = len(p_values) 781 x = np.arange(0, 1.001, 0.001) 782 plt.plot(x, x, 'k', zorder=1) 783 plt.xlim(0, 1) 784 plt.ylim(0, 1) 785 plt.xlabel('p-value') 786 plt.ylabel('Cumulative probability') 787 plt.title(str(bins) + ' p-values') 788 789 n = np.arange(1, bins + 1) / np.float64(bins) 790 Xs = np.sort(p_values) 791 plt.step(Xs, n) 792 diffs = n - Xs 793 loc_max_diff = np.argmax(np.abs(diffs)) 794 loc = Xs[loc_max_diff] 795 plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0)) 796 plt.draw() 797 798 print(scipy.stats.kstest(p_values, 'uniform'))
21class Fit_result(Sequence): 22 """Represents fit results. 23 24 Attributes 25 ---------- 26 fit_parameters : list 27 results for the individual fit parameters, 28 also accessible via indices. 29 chisquare_by_dof : float 30 reduced chisquare. 31 p_value : float 32 p-value of the fit 33 t2_p_value : float 34 Hotelling t-squared p-value for correlated fits. 35 """ 36 37 def __init__(self): 38 self.fit_parameters = None 39 40 def __getitem__(self, idx): 41 return self.fit_parameters[idx] 42 43 def __len__(self): 44 return len(self.fit_parameters) 45 46 def gamma_method(self, **kwargs): 47 """Apply the gamma method to all fit parameters""" 48 [o.gamma_method(**kwargs) for o in self.fit_parameters] 49 50 gm = gamma_method 51 52 def __str__(self): 53 my_str = 'Goodness of fit:\n' 54 if hasattr(self, 'chisquare_by_dof'): 55 my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n' 56 elif hasattr(self, 'residual_variance'): 57 my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n' 58 if hasattr(self, 'chisquare_by_expected_chisquare'): 59 my_str += '\u03C7\u00b2/\u03C7\u00b2exp = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n' 60 if hasattr(self, 'p_value'): 61 my_str += 'p-value = ' + f'{self.p_value:2.4f}' + '\n' 62 if hasattr(self, 't2_p_value'): 63 my_str += 't\u00B2p-value = ' + f'{self.t2_p_value:2.4f}' + '\n' 64 my_str += 'Fit parameters:\n' 65 for i_par, par in enumerate(self.fit_parameters): 66 my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n' 67 return my_str 68 69 def __repr__(self): 70 m = max(map(len, list(self.__dict__.keys()))) + 1 71 return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())])
Represents fit results.
Attributes
- fit_parameters (list): results for the individual fit parameters, also accessible via indices.
- chisquare_by_dof (float): reduced chisquare.
- p_value (float): p-value of the fit
- t2_p_value (float): Hotelling t-squared p-value for correlated fits.
46 def gamma_method(self, **kwargs): 47 """Apply the gamma method to all fit parameters""" 48 [o.gamma_method(**kwargs) for o in self.fit_parameters]
Apply the gamma method to all fit parameters
46 def gamma_method(self, **kwargs): 47 """Apply the gamma method to all fit parameters""" 48 [o.gamma_method(**kwargs) for o in self.fit_parameters]
Apply the gamma method to all fit parameters
Inherited Members
- collections.abc.Sequence
- index
- count
74def least_squares(x, y, func, priors=None, silent=False, **kwargs): 75 r'''Performs a non-linear fit to y = func(x). 76 77 Parameters 78 ---------- 79 x : list 80 list of floats. 81 y : list 82 list of Obs. 83 func : object 84 fit function, has to be of the form 85 86 ```python 87 import autograd.numpy as anp 88 89 def func(a, x): 90 return a[0] + a[1] * x + a[2] * anp.sinh(x) 91 ``` 92 93 For multiple x values func can be of the form 94 95 ```python 96 def func(a, x): 97 (x1, x2) = x 98 return a[0] * x1 ** 2 + a[1] * x2 99 ``` 100 101 It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation 102 will not work. 103 priors : list, optional 104 priors has to be a list with an entry for every parameter in the fit. The entries can either be 105 Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 106 0.548(23), 500(40) or 0.5(0.4) 107 silent : bool, optional 108 If true all output to the console is omitted (default False). 109 initial_guess : list 110 can provide an initial guess for the input parameters. Relevant for 111 non-linear fits with many parameters. In case of correlated fits the guess is used to perform 112 an uncorrelated fit which then serves as guess for the correlated fit. 113 method : str, optional 114 can be used to choose an alternative method for the minimization of chisquare. 115 The possible methods are the ones which can be used for scipy.optimize.minimize and 116 migrad of iminuit. If no method is specified, Levenberg-Marquard is used. 117 Reliable alternatives are migrad, Powell and Nelder-Mead. 118 correlated_fit : bool 119 If True, use the full inverse covariance matrix in the definition of the chisquare cost function. 120 For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`. 121 In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix). 122 This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning). 123 At the moment this option only works for `prior==None` and when no `method` is given. 124 expected_chisquare : bool 125 If True estimates the expected chisquare which is 126 corrected by effects caused by correlated input data (default False). 127 resplot : bool 128 If True, a plot which displays fit, data and residuals is generated (default False). 129 qqplot : bool 130 If True, a quantile-quantile plot of the fit result is generated (default False). 131 num_grad : bool 132 Use numerical differentation instead of automatic differentiation to perform the error propagation (default False). 133 ''' 134 if priors is not None: 135 return _prior_fit(x, y, func, priors, silent=silent, **kwargs) 136 else: 137 return _standard_fit(x, y, func, silent=silent, **kwargs)
Performs a non-linear fit to y = func(x).
Parameters
- x (list): list of floats.
- y (list): list of Obs.
func (object): fit function, has to be of the form
import autograd.numpy as anp def func(a, x): return a[0] + a[1] * x + a[2] * anp.sinh(x)
For multiple x values func can be of the form
def func(a, x): (x1, x2) = x return a[0] * x1 ** 2 + a[1] * x2
It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.
- priors (list, optional): priors has to be a list with an entry for every parameter in the fit. The entries can either be Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 0.548(23), 500(40) or 0.5(0.4)
- silent (bool, optional): If true all output to the console is omitted (default False).
- initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters. In case of correlated fits the guess is used to perform an uncorrelated fit which then serves as guess for the correlated fit.
- method (str, optional): can be used to choose an alternative method for the minimization of chisquare. The possible methods are the ones which can be used for scipy.optimize.minimize and migrad of iminuit. If no method is specified, Levenberg-Marquard is used. Reliable alternatives are migrad, Powell and Nelder-Mead.
- correlated_fit (bool):
If True, use the full inverse covariance matrix in the definition of the chisquare cost function.
For details about how the covariance matrix is estimated see
pyerrors.obs.covariance
. In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix). This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning). At the moment this option only works forprior==None
and when nomethod
is given. - expected_chisquare (bool): If True estimates the expected chisquare which is corrected by effects caused by correlated input data (default False).
- resplot (bool): If True, a plot which displays fit, data and residuals is generated (default False).
- qqplot (bool): If True, a quantile-quantile plot of the fit result is generated (default False).
- num_grad (bool): Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
140def total_least_squares(x, y, func, silent=False, **kwargs): 141 r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters. 142 143 Parameters 144 ---------- 145 x : list 146 list of Obs, or a tuple of lists of Obs 147 y : list 148 list of Obs. The dvalues of the Obs are used as x- and yerror for the fit. 149 func : object 150 func has to be of the form 151 152 ```python 153 import autograd.numpy as anp 154 155 def func(a, x): 156 return a[0] + a[1] * x + a[2] * anp.sinh(x) 157 ``` 158 159 For multiple x values func can be of the form 160 161 ```python 162 def func(a, x): 163 (x1, x2) = x 164 return a[0] * x1 ** 2 + a[1] * x2 165 ``` 166 167 It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation 168 will not work. 169 silent : bool, optional 170 If true all output to the console is omitted (default False). 171 initial_guess : list 172 can provide an initial guess for the input parameters. Relevant for non-linear 173 fits with many parameters. 174 expected_chisquare : bool 175 If true prints the expected chisquare which is 176 corrected by effects caused by correlated input data. 177 This can take a while as the full correlation matrix 178 has to be calculated (default False). 179 num_grad : bool 180 Use numerical differentation instead of automatic differentiation to perform the error propagation (default False). 181 182 Notes 183 ----- 184 Based on the orthogonal distance regression module of scipy 185 ''' 186 187 output = Fit_result() 188 189 output.fit_function = func 190 191 x = np.array(x) 192 193 x_shape = x.shape 194 195 if kwargs.get('num_grad') is True: 196 jacobian = num_jacobian 197 hessian = num_hessian 198 else: 199 jacobian = auto_jacobian 200 hessian = auto_hessian 201 202 if not callable(func): 203 raise TypeError('func has to be a function.') 204 205 for i in range(42): 206 try: 207 func(np.arange(i), x.T[0]) 208 except TypeError: 209 continue 210 except IndexError: 211 continue 212 else: 213 break 214 else: 215 raise RuntimeError("Fit function is not valid.") 216 217 n_parms = i 218 if not silent: 219 print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1)) 220 221 x_f = np.vectorize(lambda o: o.value)(x) 222 dx_f = np.vectorize(lambda o: o.dvalue)(x) 223 y_f = np.array([o.value for o in y]) 224 dy_f = np.array([o.dvalue for o in y]) 225 226 if np.any(np.asarray(dx_f) <= 0.0): 227 raise Exception('No x errors available, run the gamma method first.') 228 229 if np.any(np.asarray(dy_f) <= 0.0): 230 raise Exception('No y errors available, run the gamma method first.') 231 232 if 'initial_guess' in kwargs: 233 x0 = kwargs.get('initial_guess') 234 if len(x0) != n_parms: 235 raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms)) 236 else: 237 x0 = [1] * n_parms 238 239 data = RealData(x_f, y_f, sx=dx_f, sy=dy_f) 240 model = Model(func) 241 odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps) 242 odr.set_job(fit_type=0, deriv=1) 243 out = odr.run() 244 245 output.residual_variance = out.res_var 246 247 output.method = 'ODR' 248 249 output.message = out.stopreason 250 251 output.xplus = out.xplus 252 253 if not silent: 254 print('Method: ODR') 255 print(*out.stopreason) 256 print('Residual variance:', output.residual_variance) 257 258 if out.info > 3: 259 raise Exception('The minimization procedure did not converge.') 260 261 m = x_f.size 262 263 def odr_chisquare(p): 264 model = func(p[:n_parms], p[n_parms:].reshape(x_shape)) 265 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2) 266 return chisq 267 268 if kwargs.get('expected_chisquare') is True: 269 W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel())))) 270 271 if kwargs.get('covariance') is not None: 272 cov = kwargs.get('covariance') 273 else: 274 cov = covariance(np.concatenate((y, x.ravel()))) 275 276 number_of_x_parameters = int(m / x_f.shape[-1]) 277 278 old_jac = jacobian(func)(out.beta, out.xplus) 279 fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0))) 280 fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0]))) 281 new_jac = np.concatenate((fused_row1, fused_row2), axis=1) 282 283 A = W @ new_jac 284 P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T 285 expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W) 286 if expected_chisquare <= 0.0: 287 warnings.warn("Negative expected_chisquare.", RuntimeWarning) 288 expected_chisquare = np.abs(expected_chisquare) 289 output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare 290 if not silent: 291 print('chisquare/expected_chisquare:', 292 output.chisquare_by_expected_chisquare) 293 294 fitp = out.beta 295 try: 296 hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel()))) 297 except TypeError: 298 raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None 299 300 def odr_chisquare_compact_x(d): 301 model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape)) 302 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) 303 return chisq 304 305 jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel()))) 306 307 # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv 308 try: 309 deriv_x = -scipy.linalg.solve(hess, jac_jac_x[:n_parms + m, n_parms + m:]) 310 except np.linalg.LinAlgError: 311 raise Exception("Cannot invert hessian matrix.") 312 313 def odr_chisquare_compact_y(d): 314 model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape)) 315 chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) 316 return chisq 317 318 jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f))) 319 320 # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv 321 try: 322 deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms + m, n_parms + m:]) 323 except np.linalg.LinAlgError: 324 raise Exception("Cannot invert hessian matrix.") 325 326 result = [] 327 for i in range(n_parms): 328 result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i]))) 329 330 output.fit_parameters = result 331 332 output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) 333 output.dof = x.shape[-1] - n_parms 334 output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof) 335 336 return output
Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
Parameters
- x (list): list of Obs, or a tuple of lists of Obs
- y (list): list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
func (object): func has to be of the form
import autograd.numpy as anp def func(a, x): return a[0] + a[1] * x + a[2] * anp.sinh(x)
For multiple x values func can be of the form
def func(a, x): (x1, x2) = x return a[0] * x1 ** 2 + a[1] * x2
It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.
- silent (bool, optional): If true all output to the console is omitted (default False).
- initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters.
- expected_chisquare (bool): If true prints the expected chisquare which is corrected by effects caused by correlated input data. This can take a while as the full correlation matrix has to be calculated (default False).
- num_grad (bool): Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
Notes
Based on the orthogonal distance regression module of scipy
662def fit_lin(x, y, **kwargs): 663 """Performs a linear fit to y = n + m * x and returns two Obs n, m. 664 665 Parameters 666 ---------- 667 x : list 668 Can either be a list of floats in which case no xerror is assumed, or 669 a list of Obs, where the dvalues of the Obs are used as xerror for the fit. 670 y : list 671 List of Obs, the dvalues of the Obs are used as yerror for the fit. 672 """ 673 674 def f(a, x): 675 y = a[0] + a[1] * x 676 return y 677 678 if all(isinstance(n, Obs) for n in x): 679 out = total_least_squares(x, y, f, **kwargs) 680 return out.fit_parameters 681 elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray): 682 out = least_squares(x, y, f, **kwargs) 683 return out.fit_parameters 684 else: 685 raise Exception('Unsupported types for x')
Performs a linear fit to y = n + m * x and returns two Obs n, m.
Parameters
- x (list): Can either be a list of floats in which case no xerror is assumed, or a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
- y (list): List of Obs, the dvalues of the Obs are used as yerror for the fit.
688def qqplot(x, o_y, func, p): 689 """Generates a quantile-quantile plot of the fit result which can be used to 690 check if the residuals of the fit are gaussian distributed. 691 """ 692 693 residuals = [] 694 for i_x, i_y in zip(x, o_y): 695 residuals.append((i_y - func(p, i_x)) / i_y.dvalue) 696 residuals = sorted(residuals) 697 my_y = [o.value for o in residuals] 698 probplot = scipy.stats.probplot(my_y) 699 my_x = probplot[0][0] 700 plt.figure(figsize=(8, 8 / 1.618)) 701 plt.errorbar(my_x, my_y, fmt='o') 702 fit_start = my_x[0] 703 fit_stop = my_x[-1] 704 samples = np.arange(fit_start, fit_stop, 0.01) 705 plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution') 706 plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-') 707 708 plt.xlabel('Theoretical quantiles') 709 plt.ylabel('Ordered Values') 710 plt.legend() 711 plt.draw()
Generates a quantile-quantile plot of the fit result which can be used to check if the residuals of the fit are gaussian distributed.
714def residual_plot(x, y, func, fit_res): 715 """ Generates a plot which compares the fit to the data and displays the corresponding residuals""" 716 sorted_x = sorted(x) 717 xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0]) 718 xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2]) 719 x_samples = np.arange(xstart, xstop + 0.01, 0.01) 720 721 plt.figure(figsize=(8, 8 / 1.618)) 722 gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0) 723 ax0 = plt.subplot(gs[0]) 724 ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data') 725 ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0) 726 ax0.set_xticklabels([]) 727 ax0.set_xlim([xstart, xstop]) 728 ax0.set_xticklabels([]) 729 ax0.legend() 730 731 residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y]) 732 ax1 = plt.subplot(gs[1]) 733 ax1.plot(x, residuals, 'ko', ls='none', markersize=5) 734 ax1.tick_params(direction='out') 735 ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True) 736 ax1.axhline(y=0.0, ls='--', color='k', marker=" ") 737 ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k') 738 ax1.set_xlim([xstart, xstop]) 739 ax1.set_ylabel('Residuals') 740 plt.subplots_adjust(wspace=None, hspace=None) 741 plt.draw()
Generates a plot which compares the fit to the data and displays the corresponding residuals
744def error_band(x, func, beta): 745 """Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta.""" 746 cov = covariance(beta) 747 if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps): 748 warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning) 749 750 deriv = [] 751 for i, item in enumerate(x): 752 deriv.append(np.array(egrad(func)([o.value for o in beta], item))) 753 754 err = [] 755 for i, item in enumerate(x): 756 err.append(np.sqrt(deriv[i] @ cov @ deriv[i])) 757 err = np.array(err) 758 759 return err
Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta.
762def ks_test(objects=None): 763 """Performs a Kolmogorov–Smirnov test for the p-values of all fit object. 764 765 Parameters 766 ---------- 767 objects : list 768 List of fit results to include in the analysis (optional). 769 """ 770 771 if objects is None: 772 obs_list = [] 773 for obj in gc.get_objects(): 774 if isinstance(obj, Fit_result): 775 obs_list.append(obj) 776 else: 777 obs_list = objects 778 779 p_values = [o.p_value for o in obs_list] 780 781 bins = len(p_values) 782 x = np.arange(0, 1.001, 0.001) 783 plt.plot(x, x, 'k', zorder=1) 784 plt.xlim(0, 1) 785 plt.ylim(0, 1) 786 plt.xlabel('p-value') 787 plt.ylabel('Cumulative probability') 788 plt.title(str(bins) + ' p-values') 789 790 n = np.arange(1, bins + 1) / np.float64(bins) 791 Xs = np.sort(p_values) 792 plt.step(Xs, n) 793 diffs = n - Xs 794 loc_max_diff = np.argmax(np.abs(diffs)) 795 loc = Xs[loc_max_diff] 796 plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0)) 797 plt.draw() 798 799 print(scipy.stats.kstest(p_values, 'uniform'))
Performs a Kolmogorov–Smirnov test for the p-values of all fit object.
Parameters
- objects (list): List of fit results to include in the analysis (optional).