pyerrors.fits
1import gc 2from collections.abc import Sequence 3import warnings 4import numpy as np 5import autograd.numpy as anp 6import scipy.optimize 7import scipy.stats 8import matplotlib.pyplot as plt 9from matplotlib import gridspec 10from scipy.odr import ODR, Model, RealData 11import iminuit 12from autograd import jacobian as auto_jacobian 13from autograd import hessian as auto_hessian 14from autograd import elementwise_grad as egrad 15from numdifftools import Jacobian as num_jacobian 16from numdifftools import Hessian as num_hessian 17from .obs import Obs, derived_observable, covariance, cov_Obs 18 19 20class Fit_result(Sequence): 21 """Represents fit results. 22 23 Attributes 24 ---------- 25 fit_parameters : list 26 results for the individual fit parameters, 27 also accessible via indices. 28 chisquare_by_dof : float 29 reduced chisquare. 30 p_value : float 31 p-value of the fit 32 t2_p_value : float 33 Hotelling t-squared p-value for correlated fits. 34 """ 35 36 def __init__(self): 37 self.fit_parameters = None 38 39 def __getitem__(self, idx): 40 return self.fit_parameters[idx] 41 42 def __len__(self): 43 return len(self.fit_parameters) 44 45 def gamma_method(self, **kwargs): 46 """Apply the gamma method to all fit parameters""" 47 [o.gamma_method(**kwargs) for o in self.fit_parameters] 48 49 gm = gamma_method 50 51 def __str__(self): 52 my_str = 'Goodness of fit:\n' 53 if hasattr(self, 'chisquare_by_dof'): 54 my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n' 55 elif hasattr(self, 'residual_variance'): 56 my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n' 57 if hasattr(self, 'chisquare_by_expected_chisquare'): 58 my_str += '\u03C7\u00b2/\u03C7\u00b2exp = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n' 59 if hasattr(self, 'p_value'): 60 my_str += 'p-value = ' + f'{self.p_value:2.4f}' + '\n' 61 if hasattr(self, 't2_p_value'): 62 my_str += 't\u00B2p-value = ' + f'{self.t2_p_value:2.4f}' + '\n' 63 my_str += 'Fit parameters:\n' 64 for i_par, par in enumerate(self.fit_parameters): 65 my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n' 66 return my_str 67 68 def __repr__(self): 69 m = max(map(len, list(self.__dict__.keys()))) + 1 70 return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())]) 71 72 73def least_squares(x, y, func, priors=None, silent=False, **kwargs): 74 r'''Performs a non-linear fit to y = func(x). 75 76 Parameters 77 ---------- 78 x : list 79 list of floats. 80 y : list 81 list of Obs. 82 func : object 83 fit function, has to be of the form 84 85 ```python 86 import autograd.numpy as anp 87 88 def func(a, x): 89 return a[0] + a[1] * x + a[2] * anp.sinh(x) 90 ``` 91 92 For multiple x values func can be of the form 93 94 ```python 95 def func(a, x): 96 (x1, x2) = x 97 return a[0] * x1 ** 2 + a[1] * x2 98 ``` 99 100 It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation 101 will not work. 102 priors : list, optional 103 priors has to be a list with an entry for every parameter in the fit. The entries can either be 104 Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 105 0.548(23), 500(40) or 0.5(0.4) 106 silent : bool, optional 107 If true all output to the console is omitted (default False). 108 initial_guess : list 109 can provide an initial guess for the input parameters. Relevant for 110 non-linear fits with many parameters. In case of correlated fits the guess is used to perform 111 an uncorrelated fit which then serves as guess for the correlated fit. 112 method : str, optional 113 can be used to choose an alternative method for the minimization of chisquare. 114 The possible methods are the ones which can be used for scipy.optimize.minimize and 115 migrad of iminuit. If no method is specified, Levenberg-Marquard is used. 116 Reliable alternatives are migrad, Powell and Nelder-Mead. 117 correlated_fit : bool 118 If True, use the full inverse covariance matrix in the definition of the chisquare cost function. 119 For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`. 120 In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix). 121 This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning). 122 At the moment this option only works for `prior==None` and when no `method` is given. 123 expected_chisquare : bool 124 If True estimates the expected chisquare which is 125 corrected by effects caused by correlated input data (default False). 126 resplot : bool 127 If True, a plot which displays fit, data and residuals is generated (default False). 128 qqplot : bool 129 If True, a quantile-quantile plot of the fit result is generated (default False). 130 num_grad : bool 131 Use numerical differentation instead of automatic differentiation to perform the error propagation (default False). 132 133 Returns 134 ------- 135 output : Fit_result 136 Parameters and information on the fitted result. 137 ''' 138 if priors is not None: 139 return _prior_fit(x, y, func, priors, silent=silent, **kwargs) 140 else: 141 return _standard_fit(x, y, func, silent=silent, **kwargs) 142 143 144def total_least_squares(x, y, func, silent=False, **kwargs): 145 r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters. 146 147 Parameters 148 ---------- 149 x : list 150 list of Obs, or a tuple of lists of Obs 151 y : list 152 list of Obs. The dvalues of the Obs are used as x- and yerror for the fit. 153 func : object 154 func has to be of the form 155 156 ```python 157 import autograd.numpy as anp 158 159 def func(a, x): 160 return a[0] + a[1] * x + a[2] * anp.sinh(x) 161 ``` 162 163 For multiple x values func can be of the form 164 165 ```python 166 def func(a, x): 167 (x1, x2) = x 168 return a[0] * x1 ** 2 + a[1] * x2 169 ``` 170 171 It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation 172 will not work. 173 silent : bool, optional 174 If true all output to the console is omitted (default False). 175 initial_guess : list 176 can provide an initial guess for the input parameters. Relevant for non-linear 177 fits with many parameters. 178 expected_chisquare : bool 179 If true prints the expected chisquare which is 180 corrected by effects caused by correlated input data. 181 This can take a while as the full correlation matrix 182 has to be calculated (default False). 183 num_grad : bool 184 Use numerical differentation instead of automatic differentiation to perform the error propagation (default False). 185 186 Notes 187 ----- 188 Based on the orthogonal distance regression module of scipy. 189 190 Returns 191 ------- 192 output : Fit_result 193 Parameters and information on the fitted result. 194 ''' 195 196 output = Fit_result() 197 198 output.fit_function = func 199 200 x = np.array(x) 201 202 x_shape = x.shape 203 204 if kwargs.get('num_grad') is True: 205 jacobian = num_jacobian 206 hessian = num_hessian 207 else: 208 jacobian = auto_jacobian 209 hessian = auto_hessian 210 211 if not callable(func): 212 raise TypeError('func has to be a function.') 213 214 for i in range(42): 215 try: 216 func(np.arange(i), x.T[0]) 217 except TypeError: 218 continue 219 except IndexError: 220 continue 221 else: 222 break 223 else: 224 raise RuntimeError("Fit function is not valid.") 225 226 n_parms = i 227 if not silent: 228 print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1)) 229 230 x_f = np.vectorize(lambda o: o.value)(x) 231 dx_f = np.vectorize(lambda o: o.dvalue)(x) 232 y_f = np.array([o.value for o in y]) 233 dy_f = np.array([o.dvalue for o in y]) 234 235 if np.any(np.asarray(dx_f) <= 0.0): 236 raise Exception('No x errors available, run the gamma method first.') 237 238 if np.any(np.asarray(dy_f) <= 0.0): 239 raise Exception('No y errors available, run the gamma method first.') 240 241 if 'initial_guess' in kwargs: 242 x0 = kwargs.get('initial_guess') 243 if len(x0) != n_parms: 244 raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms)) 245 else: 246 x0 = [1] * n_parms 247 248 data = RealData(x_f, y_f, sx=dx_f, sy=dy_f) 249 model = Model(func) 250 odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps) 251 odr.set_job(fit_type=0, deriv=1) 252 out = odr.run() 253 254 output.residual_variance = out.res_var 255 256 output.method = 'ODR' 257 258 output.message = out.stopreason 259 260 output.xplus = out.xplus 261 262 if not silent: 263 print('Method: ODR') 264 print(*out.stopreason) 265 print('Residual variance:', output.residual_variance) 266 267 if out.info > 3: 268 raise Exception('The minimization procedure did not converge.') 269 270 m = x_f.size 271 272 def odr_chisquare(p): 273 model = func(p[:n_parms], p[n_parms:].reshape(x_shape)) 274 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2) 275 return chisq 276 277 if kwargs.get('expected_chisquare') is True: 278 W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel())))) 279 280 if kwargs.get('covariance') is not None: 281 cov = kwargs.get('covariance') 282 else: 283 cov = covariance(np.concatenate((y, x.ravel()))) 284 285 number_of_x_parameters = int(m / x_f.shape[-1]) 286 287 old_jac = jacobian(func)(out.beta, out.xplus) 288 fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0))) 289 fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0]))) 290 new_jac = np.concatenate((fused_row1, fused_row2), axis=1) 291 292 A = W @ new_jac 293 P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T 294 expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W) 295 if expected_chisquare <= 0.0: 296 warnings.warn("Negative expected_chisquare.", RuntimeWarning) 297 expected_chisquare = np.abs(expected_chisquare) 298 output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare 299 if not silent: 300 print('chisquare/expected_chisquare:', 301 output.chisquare_by_expected_chisquare) 302 303 fitp = out.beta 304 try: 305 hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel()))) 306 except TypeError: 307 raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None 308 309 def odr_chisquare_compact_x(d): 310 model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape)) 311 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) 312 return chisq 313 314 jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel()))) 315 316 # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv 317 try: 318 deriv_x = -scipy.linalg.solve(hess, jac_jac_x[:n_parms + m, n_parms + m:]) 319 except np.linalg.LinAlgError: 320 raise Exception("Cannot invert hessian matrix.") 321 322 def odr_chisquare_compact_y(d): 323 model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape)) 324 chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) 325 return chisq 326 327 jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f))) 328 329 # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv 330 try: 331 deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms + m, n_parms + m:]) 332 except np.linalg.LinAlgError: 333 raise Exception("Cannot invert hessian matrix.") 334 335 result = [] 336 for i in range(n_parms): 337 result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i]))) 338 339 output.fit_parameters = result 340 341 output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) 342 output.dof = x.shape[-1] - n_parms 343 output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof) 344 345 return output 346 347 348def _prior_fit(x, y, func, priors, silent=False, **kwargs): 349 output = Fit_result() 350 351 output.fit_function = func 352 353 x = np.asarray(x) 354 355 if kwargs.get('num_grad') is True: 356 hessian = num_hessian 357 else: 358 hessian = auto_hessian 359 360 if not callable(func): 361 raise TypeError('func has to be a function.') 362 363 for i in range(100): 364 try: 365 func(np.arange(i), 0) 366 except TypeError: 367 continue 368 except IndexError: 369 continue 370 else: 371 break 372 else: 373 raise RuntimeError("Fit function is not valid.") 374 375 n_parms = i 376 377 if n_parms != len(priors): 378 raise Exception('Priors does not have the correct length.') 379 380 def extract_val_and_dval(string): 381 split_string = string.split('(') 382 if '.' in split_string[0] and '.' not in split_string[1][:-1]: 383 factor = 10 ** -len(split_string[0].partition('.')[2]) 384 else: 385 factor = 1 386 return float(split_string[0]), float(split_string[1][:-1]) * factor 387 388 loc_priors = [] 389 for i_n, i_prior in enumerate(priors): 390 if isinstance(i_prior, Obs): 391 loc_priors.append(i_prior) 392 else: 393 loc_val, loc_dval = extract_val_and_dval(i_prior) 394 loc_priors.append(cov_Obs(loc_val, loc_dval ** 2, '#prior' + str(i_n) + f"_{np.random.randint(2147483647):010d}")) 395 396 output.priors = loc_priors 397 398 if not silent: 399 print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1)) 400 401 y_f = [o.value for o in y] 402 dy_f = [o.dvalue for o in y] 403 404 if np.any(np.asarray(dy_f) <= 0.0): 405 raise Exception('No y errors available, run the gamma method first.') 406 407 p_f = [o.value for o in loc_priors] 408 dp_f = [o.dvalue for o in loc_priors] 409 410 if np.any(np.asarray(dp_f) <= 0.0): 411 raise Exception('No prior errors available, run the gamma method first.') 412 413 if 'initial_guess' in kwargs: 414 x0 = kwargs.get('initial_guess') 415 if len(x0) != n_parms: 416 raise Exception('Initial guess does not have the correct length.') 417 else: 418 x0 = p_f 419 420 def chisqfunc(p): 421 model = func(p, x) 422 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((p_f - p) / dp_f) ** 2) 423 return chisq 424 425 if not silent: 426 print('Method: migrad') 427 428 m = iminuit.Minuit(chisqfunc, x0) 429 m.errordef = 1 430 m.print_level = 0 431 if 'tol' in kwargs: 432 m.tol = kwargs.get('tol') 433 else: 434 m.tol = 1e-4 435 m.migrad() 436 params = np.asarray(m.values) 437 438 output.chisquare_by_dof = m.fval / len(x) 439 440 output.method = 'migrad' 441 442 if not silent: 443 print('chisquare/d.o.f.:', output.chisquare_by_dof) 444 445 if not m.fmin.is_valid: 446 raise Exception('The minimization procedure did not converge.') 447 448 hess = hessian(chisqfunc)(params) 449 hess_inv = np.linalg.pinv(hess) 450 451 def chisqfunc_compact(d): 452 model = func(d[:n_parms], x) 453 chisq = anp.sum(((d[n_parms: n_parms + len(x)] - model) / dy_f) ** 2) + anp.sum(((d[n_parms + len(x):] - d[:n_parms]) / dp_f) ** 2) 454 return chisq 455 456 jac_jac = hessian(chisqfunc_compact)(np.concatenate((params, y_f, p_f))) 457 458 deriv = -hess_inv @ jac_jac[:n_parms, n_parms:] 459 460 result = [] 461 for i in range(n_parms): 462 result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * params[i], list(y) + list(loc_priors), man_grad=list(deriv[i]))) 463 464 output.fit_parameters = result 465 output.chisquare = chisqfunc(np.asarray(params)) 466 467 if kwargs.get('resplot') is True: 468 residual_plot(x, y, func, result) 469 470 if kwargs.get('qqplot') is True: 471 qqplot(x, y, func, result) 472 473 return output 474 475 476def _standard_fit(x, y, func, silent=False, **kwargs): 477 478 output = Fit_result() 479 480 output.fit_function = func 481 482 x = np.asarray(x) 483 484 if kwargs.get('num_grad') is True: 485 jacobian = num_jacobian 486 hessian = num_hessian 487 else: 488 jacobian = auto_jacobian 489 hessian = auto_hessian 490 491 if x.shape[-1] != len(y): 492 raise Exception('x and y input have to have the same length') 493 494 if len(x.shape) > 2: 495 raise Exception('Unknown format for x values') 496 497 if not callable(func): 498 raise TypeError('func has to be a function.') 499 500 for i in range(42): 501 try: 502 func(np.arange(i), x.T[0]) 503 except TypeError: 504 continue 505 except IndexError: 506 continue 507 else: 508 break 509 else: 510 raise RuntimeError("Fit function is not valid.") 511 512 n_parms = i 513 514 if not silent: 515 print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1)) 516 517 y_f = [o.value for o in y] 518 dy_f = [o.dvalue for o in y] 519 520 if np.any(np.asarray(dy_f) <= 0.0): 521 raise Exception('No y errors available, run the gamma method first.') 522 523 if 'initial_guess' in kwargs: 524 x0 = kwargs.get('initial_guess') 525 if len(x0) != n_parms: 526 raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms)) 527 else: 528 x0 = [0.1] * n_parms 529 530 if kwargs.get('correlated_fit') is True: 531 corr = covariance(y, correlation=True, **kwargs) 532 covdiag = np.diag(1 / np.asarray(dy_f)) 533 condn = np.linalg.cond(corr) 534 if condn > 0.1 / np.finfo(float).eps: 535 raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})") 536 if condn > 1e13: 537 warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning) 538 chol = np.linalg.cholesky(corr) 539 chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True) 540 541 def chisqfunc_corr(p): 542 model = func(p, x) 543 chisq = anp.sum(anp.dot(chol_inv, (y_f - model)) ** 2) 544 return chisq 545 546 def chisqfunc(p): 547 model = func(p, x) 548 chisq = anp.sum(((y_f - model) / dy_f) ** 2) 549 return chisq 550 551 output.method = kwargs.get('method', 'Levenberg-Marquardt') 552 if not silent: 553 print('Method:', output.method) 554 555 if output.method != 'Levenberg-Marquardt': 556 if output.method == 'migrad': 557 fit_result = iminuit.minimize(chisqfunc, x0, tol=1e-4) # Stopping criterion 0.002 * tol * errordef 558 if kwargs.get('correlated_fit') is True: 559 fit_result = iminuit.minimize(chisqfunc_corr, fit_result.x, tol=1e-4) # Stopping criterion 0.002 * tol * errordef 560 output.iterations = fit_result.nfev 561 else: 562 fit_result = scipy.optimize.minimize(chisqfunc, x0, method=kwargs.get('method'), tol=1e-12) 563 if kwargs.get('correlated_fit') is True: 564 fit_result = scipy.optimize.minimize(chisqfunc_corr, fit_result.x, method=kwargs.get('method'), tol=1e-12) 565 output.iterations = fit_result.nit 566 567 chisquare = fit_result.fun 568 569 else: 570 if kwargs.get('correlated_fit') is True: 571 def chisqfunc_residuals_corr(p): 572 model = func(p, x) 573 chisq = anp.dot(chol_inv, (y_f - model)) 574 return chisq 575 576 def chisqfunc_residuals(p): 577 model = func(p, x) 578 chisq = ((y_f - model) / dy_f) 579 return chisq 580 581 fit_result = scipy.optimize.least_squares(chisqfunc_residuals, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15) 582 if kwargs.get('correlated_fit') is True: 583 fit_result = scipy.optimize.least_squares(chisqfunc_residuals_corr, fit_result.x, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15) 584 585 chisquare = np.sum(fit_result.fun ** 2) 586 if kwargs.get('correlated_fit') is True: 587 assert np.isclose(chisquare, chisqfunc_corr(fit_result.x), atol=1e-14) 588 else: 589 assert np.isclose(chisquare, chisqfunc(fit_result.x), atol=1e-14) 590 591 output.iterations = fit_result.nfev 592 593 if not fit_result.success: 594 raise Exception('The minimization procedure did not converge.') 595 596 if x.shape[-1] - n_parms > 0: 597 output.chisquare_by_dof = chisquare / (x.shape[-1] - n_parms) 598 else: 599 output.chisquare_by_dof = float('nan') 600 601 output.message = fit_result.message 602 if not silent: 603 print(fit_result.message) 604 print('chisquare/d.o.f.:', output.chisquare_by_dof) 605 606 if kwargs.get('expected_chisquare') is True: 607 if kwargs.get('correlated_fit') is not True: 608 W = np.diag(1 / np.asarray(dy_f)) 609 cov = covariance(y) 610 A = W @ jacobian(func)(fit_result.x, x) 611 P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T 612 expected_chisquare = np.trace((np.identity(x.shape[-1]) - P_phi) @ W @ cov @ W) 613 output.chisquare_by_expected_chisquare = chisquare / expected_chisquare 614 if not silent: 615 print('chisquare/expected_chisquare:', 616 output.chisquare_by_expected_chisquare) 617 618 fitp = fit_result.x 619 try: 620 if kwargs.get('correlated_fit') is True: 621 hess = hessian(chisqfunc_corr)(fitp) 622 else: 623 hess = hessian(chisqfunc)(fitp) 624 except TypeError: 625 raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None 626 627 if kwargs.get('correlated_fit') is True: 628 def chisqfunc_compact(d): 629 model = func(d[:n_parms], x) 630 chisq = anp.sum(anp.dot(chol_inv, (d[n_parms:] - model)) ** 2) 631 return chisq 632 633 else: 634 def chisqfunc_compact(d): 635 model = func(d[:n_parms], x) 636 chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2) 637 return chisq 638 639 jac_jac = hessian(chisqfunc_compact)(np.concatenate((fitp, y_f))) 640 641 # Compute hess^{-1} @ jac_jac[:n_parms, n_parms:] using LAPACK dgesv 642 try: 643 deriv = -scipy.linalg.solve(hess, jac_jac[:n_parms, n_parms:]) 644 except np.linalg.LinAlgError: 645 raise Exception("Cannot invert hessian matrix.") 646 647 result = [] 648 for i in range(n_parms): 649 result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * fit_result.x[i], list(y), man_grad=list(deriv[i]))) 650 651 output.fit_parameters = result 652 653 output.chisquare = chisquare 654 output.dof = x.shape[-1] - n_parms 655 output.p_value = 1 - scipy.stats.chi2.cdf(output.chisquare, output.dof) 656 # Hotelling t-squared p-value for correlated fits. 657 if kwargs.get('correlated_fit') is True: 658 n_cov = np.min(np.vectorize(lambda x: x.N)(y)) 659 output.t2_p_value = 1 - scipy.stats.f.cdf((n_cov - output.dof) / (output.dof * (n_cov - 1)) * output.chisquare, 660 output.dof, n_cov - output.dof) 661 662 if kwargs.get('resplot') is True: 663 residual_plot(x, y, func, result) 664 665 if kwargs.get('qqplot') is True: 666 qqplot(x, y, func, result) 667 668 return output 669 670 671def fit_lin(x, y, **kwargs): 672 """Performs a linear fit to y = n + m * x and returns two Obs n, m. 673 674 Parameters 675 ---------- 676 x : list 677 Can either be a list of floats in which case no xerror is assumed, or 678 a list of Obs, where the dvalues of the Obs are used as xerror for the fit. 679 y : list 680 List of Obs, the dvalues of the Obs are used as yerror for the fit. 681 682 Returns 683 ------- 684 fit_parameters : list[Obs] 685 LIist of fitted observables. 686 """ 687 688 def f(a, x): 689 y = a[0] + a[1] * x 690 return y 691 692 if all(isinstance(n, Obs) for n in x): 693 out = total_least_squares(x, y, f, **kwargs) 694 return out.fit_parameters 695 elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray): 696 out = least_squares(x, y, f, **kwargs) 697 return out.fit_parameters 698 else: 699 raise Exception('Unsupported types for x') 700 701 702def qqplot(x, o_y, func, p): 703 """Generates a quantile-quantile plot of the fit result which can be used to 704 check if the residuals of the fit are gaussian distributed. 705 706 Returns 707 ------- 708 None 709 """ 710 711 residuals = [] 712 for i_x, i_y in zip(x, o_y): 713 residuals.append((i_y - func(p, i_x)) / i_y.dvalue) 714 residuals = sorted(residuals) 715 my_y = [o.value for o in residuals] 716 probplot = scipy.stats.probplot(my_y) 717 my_x = probplot[0][0] 718 plt.figure(figsize=(8, 8 / 1.618)) 719 plt.errorbar(my_x, my_y, fmt='o') 720 fit_start = my_x[0] 721 fit_stop = my_x[-1] 722 samples = np.arange(fit_start, fit_stop, 0.01) 723 plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution') 724 plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-') 725 726 plt.xlabel('Theoretical quantiles') 727 plt.ylabel('Ordered Values') 728 plt.legend() 729 plt.draw() 730 731 732def residual_plot(x, y, func, fit_res): 733 """Generates a plot which compares the fit to the data and displays the corresponding residuals 734 735 Returns 736 ------- 737 None 738 """ 739 sorted_x = sorted(x) 740 xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0]) 741 xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2]) 742 x_samples = np.arange(xstart, xstop + 0.01, 0.01) 743 744 plt.figure(figsize=(8, 8 / 1.618)) 745 gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0) 746 ax0 = plt.subplot(gs[0]) 747 ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data') 748 ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0) 749 ax0.set_xticklabels([]) 750 ax0.set_xlim([xstart, xstop]) 751 ax0.set_xticklabels([]) 752 ax0.legend() 753 754 residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y]) 755 ax1 = plt.subplot(gs[1]) 756 ax1.plot(x, residuals, 'ko', ls='none', markersize=5) 757 ax1.tick_params(direction='out') 758 ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True) 759 ax1.axhline(y=0.0, ls='--', color='k', marker=" ") 760 ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k') 761 ax1.set_xlim([xstart, xstop]) 762 ax1.set_ylabel('Residuals') 763 plt.subplots_adjust(wspace=None, hspace=None) 764 plt.draw() 765 766 767def error_band(x, func, beta): 768 """Calculate the error band for an array of sample values x, for given fit function func with optimized parameters beta. 769 770 Returns 771 ------- 772 err : np.array(Obs) 773 Error band for an array of sample values x 774 """ 775 cov = covariance(beta) 776 if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps): 777 warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning) 778 779 deriv = [] 780 for i, item in enumerate(x): 781 deriv.append(np.array(egrad(func)([o.value for o in beta], item))) 782 783 err = [] 784 for i, item in enumerate(x): 785 err.append(np.sqrt(deriv[i] @ cov @ deriv[i])) 786 err = np.array(err) 787 788 return err 789 790 791def ks_test(objects=None): 792 """Performs a Kolmogorov–Smirnov test for the p-values of all fit object. 793 794 Parameters 795 ---------- 796 objects : list 797 List of fit results to include in the analysis (optional). 798 799 Returns 800 ------- 801 None 802 """ 803 804 if objects is None: 805 obs_list = [] 806 for obj in gc.get_objects(): 807 if isinstance(obj, Fit_result): 808 obs_list.append(obj) 809 else: 810 obs_list = objects 811 812 p_values = [o.p_value for o in obs_list] 813 814 bins = len(p_values) 815 x = np.arange(0, 1.001, 0.001) 816 plt.plot(x, x, 'k', zorder=1) 817 plt.xlim(0, 1) 818 plt.ylim(0, 1) 819 plt.xlabel('p-value') 820 plt.ylabel('Cumulative probability') 821 plt.title(str(bins) + ' p-values') 822 823 n = np.arange(1, bins + 1) / np.float64(bins) 824 Xs = np.sort(p_values) 825 plt.step(Xs, n) 826 diffs = n - Xs 827 loc_max_diff = np.argmax(np.abs(diffs)) 828 loc = Xs[loc_max_diff] 829 plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0)) 830 plt.draw() 831 832 print(scipy.stats.kstest(p_values, 'uniform'))
21class Fit_result(Sequence): 22 """Represents fit results. 23 24 Attributes 25 ---------- 26 fit_parameters : list 27 results for the individual fit parameters, 28 also accessible via indices. 29 chisquare_by_dof : float 30 reduced chisquare. 31 p_value : float 32 p-value of the fit 33 t2_p_value : float 34 Hotelling t-squared p-value for correlated fits. 35 """ 36 37 def __init__(self): 38 self.fit_parameters = None 39 40 def __getitem__(self, idx): 41 return self.fit_parameters[idx] 42 43 def __len__(self): 44 return len(self.fit_parameters) 45 46 def gamma_method(self, **kwargs): 47 """Apply the gamma method to all fit parameters""" 48 [o.gamma_method(**kwargs) for o in self.fit_parameters] 49 50 gm = gamma_method 51 52 def __str__(self): 53 my_str = 'Goodness of fit:\n' 54 if hasattr(self, 'chisquare_by_dof'): 55 my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n' 56 elif hasattr(self, 'residual_variance'): 57 my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n' 58 if hasattr(self, 'chisquare_by_expected_chisquare'): 59 my_str += '\u03C7\u00b2/\u03C7\u00b2exp = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n' 60 if hasattr(self, 'p_value'): 61 my_str += 'p-value = ' + f'{self.p_value:2.4f}' + '\n' 62 if hasattr(self, 't2_p_value'): 63 my_str += 't\u00B2p-value = ' + f'{self.t2_p_value:2.4f}' + '\n' 64 my_str += 'Fit parameters:\n' 65 for i_par, par in enumerate(self.fit_parameters): 66 my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n' 67 return my_str 68 69 def __repr__(self): 70 m = max(map(len, list(self.__dict__.keys()))) + 1 71 return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())])
Represents fit results.
Attributes
- fit_parameters (list): results for the individual fit parameters, also accessible via indices.
- chisquare_by_dof (float): reduced chisquare.
- p_value (float): p-value of the fit
- t2_p_value (float): Hotelling t-squared p-value for correlated fits.
46 def gamma_method(self, **kwargs): 47 """Apply the gamma method to all fit parameters""" 48 [o.gamma_method(**kwargs) for o in self.fit_parameters]
Apply the gamma method to all fit parameters
46 def gamma_method(self, **kwargs): 47 """Apply the gamma method to all fit parameters""" 48 [o.gamma_method(**kwargs) for o in self.fit_parameters]
Apply the gamma method to all fit parameters
Inherited Members
- collections.abc.Sequence
- index
- count
74def least_squares(x, y, func, priors=None, silent=False, **kwargs): 75 r'''Performs a non-linear fit to y = func(x). 76 77 Parameters 78 ---------- 79 x : list 80 list of floats. 81 y : list 82 list of Obs. 83 func : object 84 fit function, has to be of the form 85 86 ```python 87 import autograd.numpy as anp 88 89 def func(a, x): 90 return a[0] + a[1] * x + a[2] * anp.sinh(x) 91 ``` 92 93 For multiple x values func can be of the form 94 95 ```python 96 def func(a, x): 97 (x1, x2) = x 98 return a[0] * x1 ** 2 + a[1] * x2 99 ``` 100 101 It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation 102 will not work. 103 priors : list, optional 104 priors has to be a list with an entry for every parameter in the fit. The entries can either be 105 Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 106 0.548(23), 500(40) or 0.5(0.4) 107 silent : bool, optional 108 If true all output to the console is omitted (default False). 109 initial_guess : list 110 can provide an initial guess for the input parameters. Relevant for 111 non-linear fits with many parameters. In case of correlated fits the guess is used to perform 112 an uncorrelated fit which then serves as guess for the correlated fit. 113 method : str, optional 114 can be used to choose an alternative method for the minimization of chisquare. 115 The possible methods are the ones which can be used for scipy.optimize.minimize and 116 migrad of iminuit. If no method is specified, Levenberg-Marquard is used. 117 Reliable alternatives are migrad, Powell and Nelder-Mead. 118 correlated_fit : bool 119 If True, use the full inverse covariance matrix in the definition of the chisquare cost function. 120 For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`. 121 In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix). 122 This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning). 123 At the moment this option only works for `prior==None` and when no `method` is given. 124 expected_chisquare : bool 125 If True estimates the expected chisquare which is 126 corrected by effects caused by correlated input data (default False). 127 resplot : bool 128 If True, a plot which displays fit, data and residuals is generated (default False). 129 qqplot : bool 130 If True, a quantile-quantile plot of the fit result is generated (default False). 131 num_grad : bool 132 Use numerical differentation instead of automatic differentiation to perform the error propagation (default False). 133 134 Returns 135 ------- 136 output : Fit_result 137 Parameters and information on the fitted result. 138 ''' 139 if priors is not None: 140 return _prior_fit(x, y, func, priors, silent=silent, **kwargs) 141 else: 142 return _standard_fit(x, y, func, silent=silent, **kwargs)
Performs a non-linear fit to y = func(x).
Parameters
- x (list): list of floats.
- y (list): list of Obs.
func (object): fit function, has to be of the form
import autograd.numpy as anp def func(a, x): return a[0] + a[1] * x + a[2] * anp.sinh(x)
For multiple x values func can be of the form
def func(a, x): (x1, x2) = x return a[0] * x1 ** 2 + a[1] * x2
It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.
- priors (list, optional): priors has to be a list with an entry for every parameter in the fit. The entries can either be Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 0.548(23), 500(40) or 0.5(0.4)
- silent (bool, optional): If true all output to the console is omitted (default False).
- initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters. In case of correlated fits the guess is used to perform an uncorrelated fit which then serves as guess for the correlated fit.
- method (str, optional): can be used to choose an alternative method for the minimization of chisquare. The possible methods are the ones which can be used for scipy.optimize.minimize and migrad of iminuit. If no method is specified, Levenberg-Marquard is used. Reliable alternatives are migrad, Powell and Nelder-Mead.
- correlated_fit (bool):
If True, use the full inverse covariance matrix in the definition of the chisquare cost function.
For details about how the covariance matrix is estimated see
pyerrors.obs.covariance
. In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix). This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning). At the moment this option only works forprior==None
and when nomethod
is given. - expected_chisquare (bool): If True estimates the expected chisquare which is corrected by effects caused by correlated input data (default False).
- resplot (bool): If True, a plot which displays fit, data and residuals is generated (default False).
- qqplot (bool): If True, a quantile-quantile plot of the fit result is generated (default False).
- num_grad (bool): Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
Returns
- output (Fit_result): Parameters and information on the fitted result.
145def total_least_squares(x, y, func, silent=False, **kwargs): 146 r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters. 147 148 Parameters 149 ---------- 150 x : list 151 list of Obs, or a tuple of lists of Obs 152 y : list 153 list of Obs. The dvalues of the Obs are used as x- and yerror for the fit. 154 func : object 155 func has to be of the form 156 157 ```python 158 import autograd.numpy as anp 159 160 def func(a, x): 161 return a[0] + a[1] * x + a[2] * anp.sinh(x) 162 ``` 163 164 For multiple x values func can be of the form 165 166 ```python 167 def func(a, x): 168 (x1, x2) = x 169 return a[0] * x1 ** 2 + a[1] * x2 170 ``` 171 172 It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation 173 will not work. 174 silent : bool, optional 175 If true all output to the console is omitted (default False). 176 initial_guess : list 177 can provide an initial guess for the input parameters. Relevant for non-linear 178 fits with many parameters. 179 expected_chisquare : bool 180 If true prints the expected chisquare which is 181 corrected by effects caused by correlated input data. 182 This can take a while as the full correlation matrix 183 has to be calculated (default False). 184 num_grad : bool 185 Use numerical differentation instead of automatic differentiation to perform the error propagation (default False). 186 187 Notes 188 ----- 189 Based on the orthogonal distance regression module of scipy. 190 191 Returns 192 ------- 193 output : Fit_result 194 Parameters and information on the fitted result. 195 ''' 196 197 output = Fit_result() 198 199 output.fit_function = func 200 201 x = np.array(x) 202 203 x_shape = x.shape 204 205 if kwargs.get('num_grad') is True: 206 jacobian = num_jacobian 207 hessian = num_hessian 208 else: 209 jacobian = auto_jacobian 210 hessian = auto_hessian 211 212 if not callable(func): 213 raise TypeError('func has to be a function.') 214 215 for i in range(42): 216 try: 217 func(np.arange(i), x.T[0]) 218 except TypeError: 219 continue 220 except IndexError: 221 continue 222 else: 223 break 224 else: 225 raise RuntimeError("Fit function is not valid.") 226 227 n_parms = i 228 if not silent: 229 print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1)) 230 231 x_f = np.vectorize(lambda o: o.value)(x) 232 dx_f = np.vectorize(lambda o: o.dvalue)(x) 233 y_f = np.array([o.value for o in y]) 234 dy_f = np.array([o.dvalue for o in y]) 235 236 if np.any(np.asarray(dx_f) <= 0.0): 237 raise Exception('No x errors available, run the gamma method first.') 238 239 if np.any(np.asarray(dy_f) <= 0.0): 240 raise Exception('No y errors available, run the gamma method first.') 241 242 if 'initial_guess' in kwargs: 243 x0 = kwargs.get('initial_guess') 244 if len(x0) != n_parms: 245 raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms)) 246 else: 247 x0 = [1] * n_parms 248 249 data = RealData(x_f, y_f, sx=dx_f, sy=dy_f) 250 model = Model(func) 251 odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps) 252 odr.set_job(fit_type=0, deriv=1) 253 out = odr.run() 254 255 output.residual_variance = out.res_var 256 257 output.method = 'ODR' 258 259 output.message = out.stopreason 260 261 output.xplus = out.xplus 262 263 if not silent: 264 print('Method: ODR') 265 print(*out.stopreason) 266 print('Residual variance:', output.residual_variance) 267 268 if out.info > 3: 269 raise Exception('The minimization procedure did not converge.') 270 271 m = x_f.size 272 273 def odr_chisquare(p): 274 model = func(p[:n_parms], p[n_parms:].reshape(x_shape)) 275 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2) 276 return chisq 277 278 if kwargs.get('expected_chisquare') is True: 279 W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel())))) 280 281 if kwargs.get('covariance') is not None: 282 cov = kwargs.get('covariance') 283 else: 284 cov = covariance(np.concatenate((y, x.ravel()))) 285 286 number_of_x_parameters = int(m / x_f.shape[-1]) 287 288 old_jac = jacobian(func)(out.beta, out.xplus) 289 fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0))) 290 fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0]))) 291 new_jac = np.concatenate((fused_row1, fused_row2), axis=1) 292 293 A = W @ new_jac 294 P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T 295 expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W) 296 if expected_chisquare <= 0.0: 297 warnings.warn("Negative expected_chisquare.", RuntimeWarning) 298 expected_chisquare = np.abs(expected_chisquare) 299 output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare 300 if not silent: 301 print('chisquare/expected_chisquare:', 302 output.chisquare_by_expected_chisquare) 303 304 fitp = out.beta 305 try: 306 hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel()))) 307 except TypeError: 308 raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None 309 310 def odr_chisquare_compact_x(d): 311 model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape)) 312 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) 313 return chisq 314 315 jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel()))) 316 317 # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv 318 try: 319 deriv_x = -scipy.linalg.solve(hess, jac_jac_x[:n_parms + m, n_parms + m:]) 320 except np.linalg.LinAlgError: 321 raise Exception("Cannot invert hessian matrix.") 322 323 def odr_chisquare_compact_y(d): 324 model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape)) 325 chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) 326 return chisq 327 328 jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f))) 329 330 # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv 331 try: 332 deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms + m, n_parms + m:]) 333 except np.linalg.LinAlgError: 334 raise Exception("Cannot invert hessian matrix.") 335 336 result = [] 337 for i in range(n_parms): 338 result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i]))) 339 340 output.fit_parameters = result 341 342 output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) 343 output.dof = x.shape[-1] - n_parms 344 output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof) 345 346 return output
Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
Parameters
- x (list): list of Obs, or a tuple of lists of Obs
- y (list): list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
func (object): func has to be of the form
import autograd.numpy as anp def func(a, x): return a[0] + a[1] * x + a[2] * anp.sinh(x)
For multiple x values func can be of the form
def func(a, x): (x1, x2) = x return a[0] * x1 ** 2 + a[1] * x2
It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.
- silent (bool, optional): If true all output to the console is omitted (default False).
- initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters.
- expected_chisquare (bool): If true prints the expected chisquare which is corrected by effects caused by correlated input data. This can take a while as the full correlation matrix has to be calculated (default False).
- num_grad (bool): Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
Notes
Based on the orthogonal distance regression module of scipy.
Returns
- output (Fit_result): Parameters and information on the fitted result.
672def fit_lin(x, y, **kwargs): 673 """Performs a linear fit to y = n + m * x and returns two Obs n, m. 674 675 Parameters 676 ---------- 677 x : list 678 Can either be a list of floats in which case no xerror is assumed, or 679 a list of Obs, where the dvalues of the Obs are used as xerror for the fit. 680 y : list 681 List of Obs, the dvalues of the Obs are used as yerror for the fit. 682 683 Returns 684 ------- 685 fit_parameters : list[Obs] 686 LIist of fitted observables. 687 """ 688 689 def f(a, x): 690 y = a[0] + a[1] * x 691 return y 692 693 if all(isinstance(n, Obs) for n in x): 694 out = total_least_squares(x, y, f, **kwargs) 695 return out.fit_parameters 696 elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray): 697 out = least_squares(x, y, f, **kwargs) 698 return out.fit_parameters 699 else: 700 raise Exception('Unsupported types for x')
Performs a linear fit to y = n + m * x and returns two Obs n, m.
Parameters
- x (list): Can either be a list of floats in which case no xerror is assumed, or a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
- y (list): List of Obs, the dvalues of the Obs are used as yerror for the fit.
Returns
- fit_parameters (list[Obs]): LIist of fitted observables.
703def qqplot(x, o_y, func, p): 704 """Generates a quantile-quantile plot of the fit result which can be used to 705 check if the residuals of the fit are gaussian distributed. 706 707 Returns 708 ------- 709 None 710 """ 711 712 residuals = [] 713 for i_x, i_y in zip(x, o_y): 714 residuals.append((i_y - func(p, i_x)) / i_y.dvalue) 715 residuals = sorted(residuals) 716 my_y = [o.value for o in residuals] 717 probplot = scipy.stats.probplot(my_y) 718 my_x = probplot[0][0] 719 plt.figure(figsize=(8, 8 / 1.618)) 720 plt.errorbar(my_x, my_y, fmt='o') 721 fit_start = my_x[0] 722 fit_stop = my_x[-1] 723 samples = np.arange(fit_start, fit_stop, 0.01) 724 plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution') 725 plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-') 726 727 plt.xlabel('Theoretical quantiles') 728 plt.ylabel('Ordered Values') 729 plt.legend() 730 plt.draw()
Generates a quantile-quantile plot of the fit result which can be used to check if the residuals of the fit are gaussian distributed.
Returns
- None
733def residual_plot(x, y, func, fit_res): 734 """Generates a plot which compares the fit to the data and displays the corresponding residuals 735 736 Returns 737 ------- 738 None 739 """ 740 sorted_x = sorted(x) 741 xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0]) 742 xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2]) 743 x_samples = np.arange(xstart, xstop + 0.01, 0.01) 744 745 plt.figure(figsize=(8, 8 / 1.618)) 746 gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0) 747 ax0 = plt.subplot(gs[0]) 748 ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data') 749 ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0) 750 ax0.set_xticklabels([]) 751 ax0.set_xlim([xstart, xstop]) 752 ax0.set_xticklabels([]) 753 ax0.legend() 754 755 residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y]) 756 ax1 = plt.subplot(gs[1]) 757 ax1.plot(x, residuals, 'ko', ls='none', markersize=5) 758 ax1.tick_params(direction='out') 759 ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True) 760 ax1.axhline(y=0.0, ls='--', color='k', marker=" ") 761 ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k') 762 ax1.set_xlim([xstart, xstop]) 763 ax1.set_ylabel('Residuals') 764 plt.subplots_adjust(wspace=None, hspace=None) 765 plt.draw()
Generates a plot which compares the fit to the data and displays the corresponding residuals
Returns
- None
768def error_band(x, func, beta): 769 """Calculate the error band for an array of sample values x, for given fit function func with optimized parameters beta. 770 771 Returns 772 ------- 773 err : np.array(Obs) 774 Error band for an array of sample values x 775 """ 776 cov = covariance(beta) 777 if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps): 778 warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning) 779 780 deriv = [] 781 for i, item in enumerate(x): 782 deriv.append(np.array(egrad(func)([o.value for o in beta], item))) 783 784 err = [] 785 for i, item in enumerate(x): 786 err.append(np.sqrt(deriv[i] @ cov @ deriv[i])) 787 err = np.array(err) 788 789 return err
Calculate the error band for an array of sample values x, for given fit function func with optimized parameters beta.
Returns
- err (np.array(Obs)): Error band for an array of sample values x
792def ks_test(objects=None): 793 """Performs a Kolmogorov–Smirnov test for the p-values of all fit object. 794 795 Parameters 796 ---------- 797 objects : list 798 List of fit results to include in the analysis (optional). 799 800 Returns 801 ------- 802 None 803 """ 804 805 if objects is None: 806 obs_list = [] 807 for obj in gc.get_objects(): 808 if isinstance(obj, Fit_result): 809 obs_list.append(obj) 810 else: 811 obs_list = objects 812 813 p_values = [o.p_value for o in obs_list] 814 815 bins = len(p_values) 816 x = np.arange(0, 1.001, 0.001) 817 plt.plot(x, x, 'k', zorder=1) 818 plt.xlim(0, 1) 819 plt.ylim(0, 1) 820 plt.xlabel('p-value') 821 plt.ylabel('Cumulative probability') 822 plt.title(str(bins) + ' p-values') 823 824 n = np.arange(1, bins + 1) / np.float64(bins) 825 Xs = np.sort(p_values) 826 plt.step(Xs, n) 827 diffs = n - Xs 828 loc_max_diff = np.argmax(np.abs(diffs)) 829 loc = Xs[loc_max_diff] 830 plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0)) 831 plt.draw() 832 833 print(scipy.stats.kstest(p_values, 'uniform'))
Performs a Kolmogorov–Smirnov test for the p-values of all fit object.
Parameters
- objects (list): List of fit results to include in the analysis (optional).
Returns
- None