pyerrors.fits
1import gc 2from collections.abc import Sequence 3import warnings 4import numpy as np 5import autograd.numpy as anp 6import scipy.optimize 7import scipy.stats 8import matplotlib.pyplot as plt 9from matplotlib import gridspec 10from scipy.odr import ODR, Model, RealData 11import iminuit 12from autograd import jacobian as auto_jacobian 13from autograd import hessian as auto_hessian 14from autograd import elementwise_grad as egrad 15from numdifftools import Jacobian as num_jacobian 16from numdifftools import Hessian as num_hessian 17from .obs import Obs, derived_observable, covariance, cov_Obs 18 19 20class Fit_result(Sequence): 21 """Represents fit results. 22 23 Attributes 24 ---------- 25 fit_parameters : list 26 results for the individual fit parameters, 27 also accessible via indices. 28 chisquare_by_dof : float 29 reduced chisquare. 30 p_value : float 31 p-value of the fit 32 t2_p_value : float 33 Hotelling t-squared p-value for correlated fits. 34 """ 35 36 def __init__(self): 37 self.fit_parameters = None 38 39 def __getitem__(self, idx): 40 return self.fit_parameters[idx] 41 42 def __len__(self): 43 return len(self.fit_parameters) 44 45 def gamma_method(self, **kwargs): 46 """Apply the gamma method to all fit parameters""" 47 [o.gamma_method(**kwargs) for o in self.fit_parameters] 48 49 gm = gamma_method 50 51 def __str__(self): 52 my_str = 'Goodness of fit:\n' 53 if hasattr(self, 'chisquare_by_dof'): 54 my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n' 55 elif hasattr(self, 'residual_variance'): 56 my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n' 57 if hasattr(self, 'chisquare_by_expected_chisquare'): 58 my_str += '\u03C7\u00b2/\u03C7\u00b2exp = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n' 59 if hasattr(self, 'p_value'): 60 my_str += 'p-value = ' + f'{self.p_value:2.4f}' + '\n' 61 if hasattr(self, 't2_p_value'): 62 my_str += 't\u00B2p-value = ' + f'{self.t2_p_value:2.4f}' + '\n' 63 my_str += 'Fit parameters:\n' 64 for i_par, par in enumerate(self.fit_parameters): 65 my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n' 66 return my_str 67 68 def __repr__(self): 69 m = max(map(len, list(self.__dict__.keys()))) + 1 70 return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())]) 71 72 73def least_squares(x, y, func, priors=None, silent=False, **kwargs): 74 r'''Performs a non-linear fit to y = func(x). 75 ``` 76 77 Parameters 78 ---------- 79 For an uncombined fit: 80 81 x : list 82 list of floats. 83 y : list 84 list of Obs. 85 func : object 86 fit function, has to be of the form 87 88 ```python 89 import autograd.numpy as anp 90 91 def func(a, x): 92 return a[0] + a[1] * x + a[2] * anp.sinh(x) 93 ``` 94 95 For multiple x values func can be of the form 96 97 ```python 98 def func(a, x): 99 (x1, x2) = x 100 return a[0] * x1 ** 2 + a[1] * x2 101 ``` 102 It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation 103 will not work. 104 105 OR For a combined fit: 106 107 x : dict 108 dict of lists. 109 y : dict 110 dict of lists of Obs. 111 funcs : dict 112 dict of objects 113 fit functions have to be of the form (here a[0] is the common fit parameter) 114 ```python 115 import autograd.numpy as anp 116 funcs = {"a": func_a, 117 "b": func_b} 118 119 def func_a(a, x): 120 return a[1] * anp.exp(-a[0] * x) 121 122 def func_b(a, x): 123 return a[2] * anp.exp(-a[0] * x) 124 125 It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation 126 will not work. 127 128 priors : list, optional 129 priors has to be a list with an entry for every parameter in the fit. The entries can either be 130 Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 131 0.548(23), 500(40) or 0.5(0.4) 132 silent : bool, optional 133 If true all output to the console is omitted (default False). 134 initial_guess : list 135 can provide an initial guess for the input parameters. Relevant for 136 non-linear fits with many parameters. In case of correlated fits the guess is used to perform 137 an uncorrelated fit which then serves as guess for the correlated fit. 138 method : str, optional 139 can be used to choose an alternative method for the minimization of chisquare. 140 The possible methods are the ones which can be used for scipy.optimize.minimize and 141 migrad of iminuit. If no method is specified, Levenberg-Marquard is used. 142 Reliable alternatives are migrad, Powell and Nelder-Mead. 143 tol: float, optional 144 can be used (only for combined fits and methods other than Levenberg-Marquard) to set the tolerance for convergence 145 to a different value to either speed up convergence at the cost of a larger error on the fitted parameters (and possibly 146 invalid estimates for parameter uncertainties) or smaller values to get more accurate parameter values 147 The stopping criterion depends on the method, e.g. migrad: edm_max = 0.002 * tol * errordef (EDM criterion: edm < edm_max) 148 correlated_fit : bool 149 If True, use the full inverse covariance matrix in the definition of the chisquare cost function. 150 For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`. 151 In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix). 152 This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning). 153 At the moment this option only works for `prior==None` and when no `method` is given. 154 expected_chisquare : bool 155 If True estimates the expected chisquare which is 156 corrected by effects caused by correlated input data (default False). 157 resplot : bool 158 If True, a plot which displays fit, data and residuals is generated (default False). 159 qqplot : bool 160 If True, a quantile-quantile plot of the fit result is generated (default False). 161 num_grad : bool 162 Use numerical differentation instead of automatic differentiation to perform the error propagation (default False). 163 164 Returns 165 ------- 166 output : Fit_result 167 Parameters and information on the fitted result. 168 ''' 169 if priors is not None: 170 return _prior_fit(x, y, func, priors, silent=silent, **kwargs) 171 else: 172 return _combined_fit(x, y, func, silent=silent, **kwargs) 173 174 175def total_least_squares(x, y, func, silent=False, **kwargs): 176 r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters. 177 178 Parameters 179 ---------- 180 x : list 181 list of Obs, or a tuple of lists of Obs 182 y : list 183 list of Obs. The dvalues of the Obs are used as x- and yerror for the fit. 184 func : object 185 func has to be of the form 186 187 ```python 188 import autograd.numpy as anp 189 190 def func(a, x): 191 return a[0] + a[1] * x + a[2] * anp.sinh(x) 192 ``` 193 194 For multiple x values func can be of the form 195 196 ```python 197 def func(a, x): 198 (x1, x2) = x 199 return a[0] * x1 ** 2 + a[1] * x2 200 ``` 201 202 It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation 203 will not work. 204 silent : bool, optional 205 If true all output to the console is omitted (default False). 206 initial_guess : list 207 can provide an initial guess for the input parameters. Relevant for non-linear 208 fits with many parameters. 209 expected_chisquare : bool 210 If true prints the expected chisquare which is 211 corrected by effects caused by correlated input data. 212 This can take a while as the full correlation matrix 213 has to be calculated (default False). 214 num_grad : bool 215 Use numerical differentation instead of automatic differentiation to perform the error propagation (default False). 216 217 Notes 218 ----- 219 Based on the orthogonal distance regression module of scipy. 220 221 Returns 222 ------- 223 output : Fit_result 224 Parameters and information on the fitted result. 225 ''' 226 227 output = Fit_result() 228 229 output.fit_function = func 230 231 x = np.array(x) 232 233 x_shape = x.shape 234 235 if kwargs.get('num_grad') is True: 236 jacobian = num_jacobian 237 hessian = num_hessian 238 else: 239 jacobian = auto_jacobian 240 hessian = auto_hessian 241 242 if not callable(func): 243 raise TypeError('func has to be a function.') 244 245 for i in range(42): 246 try: 247 func(np.arange(i), x.T[0]) 248 except TypeError: 249 continue 250 except IndexError: 251 continue 252 else: 253 break 254 else: 255 raise RuntimeError("Fit function is not valid.") 256 257 n_parms = i 258 if not silent: 259 print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1)) 260 261 x_f = np.vectorize(lambda o: o.value)(x) 262 dx_f = np.vectorize(lambda o: o.dvalue)(x) 263 y_f = np.array([o.value for o in y]) 264 dy_f = np.array([o.dvalue for o in y]) 265 266 if np.any(np.asarray(dx_f) <= 0.0): 267 raise Exception('No x errors available, run the gamma method first.') 268 269 if np.any(np.asarray(dy_f) <= 0.0): 270 raise Exception('No y errors available, run the gamma method first.') 271 272 if 'initial_guess' in kwargs: 273 x0 = kwargs.get('initial_guess') 274 if len(x0) != n_parms: 275 raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms)) 276 else: 277 x0 = [1] * n_parms 278 279 data = RealData(x_f, y_f, sx=dx_f, sy=dy_f) 280 model = Model(func) 281 odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps) 282 odr.set_job(fit_type=0, deriv=1) 283 out = odr.run() 284 285 output.residual_variance = out.res_var 286 287 output.method = 'ODR' 288 289 output.message = out.stopreason 290 291 output.xplus = out.xplus 292 293 if not silent: 294 print('Method: ODR') 295 print(*out.stopreason) 296 print('Residual variance:', output.residual_variance) 297 298 if out.info > 3: 299 raise Exception('The minimization procedure did not converge.') 300 301 m = x_f.size 302 303 def odr_chisquare(p): 304 model = func(p[:n_parms], p[n_parms:].reshape(x_shape)) 305 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2) 306 return chisq 307 308 if kwargs.get('expected_chisquare') is True: 309 W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel())))) 310 311 if kwargs.get('covariance') is not None: 312 cov = kwargs.get('covariance') 313 else: 314 cov = covariance(np.concatenate((y, x.ravel()))) 315 316 number_of_x_parameters = int(m / x_f.shape[-1]) 317 318 old_jac = jacobian(func)(out.beta, out.xplus) 319 fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0))) 320 fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0]))) 321 new_jac = np.concatenate((fused_row1, fused_row2), axis=1) 322 323 A = W @ new_jac 324 P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T 325 expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W) 326 if expected_chisquare <= 0.0: 327 warnings.warn("Negative expected_chisquare.", RuntimeWarning) 328 expected_chisquare = np.abs(expected_chisquare) 329 output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare 330 if not silent: 331 print('chisquare/expected_chisquare:', 332 output.chisquare_by_expected_chisquare) 333 334 fitp = out.beta 335 try: 336 hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel()))) 337 except TypeError: 338 raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None 339 340 def odr_chisquare_compact_x(d): 341 model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape)) 342 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) 343 return chisq 344 345 jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel()))) 346 347 # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv 348 try: 349 deriv_x = -scipy.linalg.solve(hess, jac_jac_x[:n_parms + m, n_parms + m:]) 350 except np.linalg.LinAlgError: 351 raise Exception("Cannot invert hessian matrix.") 352 353 def odr_chisquare_compact_y(d): 354 model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape)) 355 chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) 356 return chisq 357 358 jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f))) 359 360 # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv 361 try: 362 deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms + m, n_parms + m:]) 363 except np.linalg.LinAlgError: 364 raise Exception("Cannot invert hessian matrix.") 365 366 result = [] 367 for i in range(n_parms): 368 result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i]))) 369 370 output.fit_parameters = result 371 372 output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) 373 output.dof = x.shape[-1] - n_parms 374 output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof) 375 376 return output 377 378 379def _prior_fit(x, y, func, priors, silent=False, **kwargs): 380 output = Fit_result() 381 382 output.fit_function = func 383 384 x = np.asarray(x) 385 386 if kwargs.get('num_grad') is True: 387 hessian = num_hessian 388 else: 389 hessian = auto_hessian 390 391 if not callable(func): 392 raise TypeError('func has to be a function.') 393 394 for i in range(100): 395 try: 396 func(np.arange(i), 0) 397 except TypeError: 398 continue 399 except IndexError: 400 continue 401 else: 402 break 403 else: 404 raise RuntimeError("Fit function is not valid.") 405 406 n_parms = i 407 408 if n_parms != len(priors): 409 raise Exception('Priors does not have the correct length.') 410 411 def extract_val_and_dval(string): 412 split_string = string.split('(') 413 if '.' in split_string[0] and '.' not in split_string[1][:-1]: 414 factor = 10 ** -len(split_string[0].partition('.')[2]) 415 else: 416 factor = 1 417 return float(split_string[0]), float(split_string[1][:-1]) * factor 418 419 loc_priors = [] 420 for i_n, i_prior in enumerate(priors): 421 if isinstance(i_prior, Obs): 422 loc_priors.append(i_prior) 423 else: 424 loc_val, loc_dval = extract_val_and_dval(i_prior) 425 loc_priors.append(cov_Obs(loc_val, loc_dval ** 2, '#prior' + str(i_n) + f"_{np.random.randint(2147483647):010d}")) 426 427 output.priors = loc_priors 428 429 if not silent: 430 print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1)) 431 432 y_f = [o.value for o in y] 433 dy_f = [o.dvalue for o in y] 434 435 if np.any(np.asarray(dy_f) <= 0.0): 436 raise Exception('No y errors available, run the gamma method first.') 437 438 p_f = [o.value for o in loc_priors] 439 dp_f = [o.dvalue for o in loc_priors] 440 441 if np.any(np.asarray(dp_f) <= 0.0): 442 raise Exception('No prior errors available, run the gamma method first.') 443 444 if 'initial_guess' in kwargs: 445 x0 = kwargs.get('initial_guess') 446 if len(x0) != n_parms: 447 raise Exception('Initial guess does not have the correct length.') 448 else: 449 x0 = p_f 450 451 def chisqfunc(p): 452 model = func(p, x) 453 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((p_f - p) / dp_f) ** 2) 454 return chisq 455 456 if not silent: 457 print('Method: migrad') 458 459 m = iminuit.Minuit(chisqfunc, x0) 460 m.errordef = 1 461 m.print_level = 0 462 if 'tol' in kwargs: 463 m.tol = kwargs.get('tol') 464 else: 465 m.tol = 1e-4 466 m.migrad() 467 params = np.asarray(m.values) 468 469 output.chisquare_by_dof = m.fval / len(x) 470 471 output.method = 'migrad' 472 473 if not silent: 474 print('chisquare/d.o.f.:', output.chisquare_by_dof) 475 476 if not m.fmin.is_valid: 477 raise Exception('The minimization procedure did not converge.') 478 479 hess = hessian(chisqfunc)(params) 480 hess_inv = np.linalg.pinv(hess) 481 482 def chisqfunc_compact(d): 483 model = func(d[:n_parms], x) 484 chisq = anp.sum(((d[n_parms: n_parms + len(x)] - model) / dy_f) ** 2) + anp.sum(((d[n_parms + len(x):] - d[:n_parms]) / dp_f) ** 2) 485 return chisq 486 487 jac_jac = hessian(chisqfunc_compact)(np.concatenate((params, y_f, p_f))) 488 489 deriv = -hess_inv @ jac_jac[:n_parms, n_parms:] 490 491 result = [] 492 for i in range(n_parms): 493 result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * params[i], list(y) + list(loc_priors), man_grad=list(deriv[i]))) 494 495 output.fit_parameters = result 496 output.chisquare = chisqfunc(np.asarray(params)) 497 498 if kwargs.get('resplot') is True: 499 residual_plot(x, y, func, result) 500 501 if kwargs.get('qqplot') is True: 502 qqplot(x, y, func, result) 503 504 return output 505 506 507def _combined_fit(x, y, func, silent=False, **kwargs): 508 509 output = Fit_result() 510 511 if (type(x) == dict and type(y) == dict and type(func) == dict): 512 xd = x 513 yd = y 514 funcd = func 515 output.fit_function = func 516 elif (type(x) == dict or type(y) == dict or type(func) == dict): 517 raise TypeError("All arguments have to be dictionaries in order to perform a combined fit.") 518 else: 519 x = np.asarray(x) 520 xd = {"": x} 521 yd = {"": y} 522 funcd = {"": func} 523 output.fit_function = func 524 525 if kwargs.get('num_grad') is True: 526 jacobian = num_jacobian 527 hessian = num_hessian 528 else: 529 jacobian = auto_jacobian 530 hessian = auto_hessian 531 532 key_ls = sorted(list(xd.keys())) 533 534 if sorted(list(yd.keys())) != key_ls: 535 raise Exception('x and y dictionaries do not contain the same keys.') 536 537 if sorted(list(funcd.keys())) != key_ls: 538 raise Exception('x and func dictionaries do not contain the same keys.') 539 540 x_all = np.concatenate([np.array(xd[key]) for key in key_ls]) 541 y_all = np.concatenate([np.array(yd[key]) for key in key_ls]) 542 543 y_f = [o.value for o in y_all] 544 dy_f = [o.dvalue for o in y_all] 545 546 if len(x_all.shape) > 2: 547 raise Exception('Unknown format for x values') 548 549 if np.any(np.asarray(dy_f) <= 0.0): 550 raise Exception('No y errors available, run the gamma method first.') 551 552 # number of fit parameters 553 n_parms_ls = [] 554 for key in key_ls: 555 if not callable(funcd[key]): 556 raise TypeError('func (key=' + key + ') is not a function.') 557 if len(xd[key]) != len(yd[key]): 558 raise Exception('x and y input (key=' + key + ') do not have the same length') 559 for i in range(100): 560 try: 561 funcd[key](np.arange(i), x_all.T[0]) 562 except TypeError: 563 continue 564 except IndexError: 565 continue 566 else: 567 break 568 else: 569 raise RuntimeError("Fit function (key=" + key + ") is not valid.") 570 n_parms = i 571 n_parms_ls.append(n_parms) 572 n_parms = max(n_parms_ls) 573 if not silent: 574 print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1)) 575 576 if 'initial_guess' in kwargs: 577 x0 = kwargs.get('initial_guess') 578 if len(x0) != n_parms: 579 raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms)) 580 else: 581 x0 = [0.1] * n_parms 582 583 if kwargs.get('correlated_fit') is True: 584 corr = covariance(y_all, correlation=True, **kwargs) 585 covdiag = np.diag(1 / np.asarray(dy_f)) 586 condn = np.linalg.cond(corr) 587 if condn > 0.1 / np.finfo(float).eps: 588 raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})") 589 if condn > 1e13: 590 warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning) 591 chol = np.linalg.cholesky(corr) 592 chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True) 593 594 def chisqfunc_corr(p): 595 model = np.concatenate([np.array(funcd[key](p, np.asarray(xd[key]))).reshape(-1) for key in key_ls]) 596 chisq = anp.sum(anp.dot(chol_inv, (y_f - model)) ** 2) 597 return chisq 598 599 def chisqfunc(p): 600 func_list = np.concatenate([[funcd[k]] * len(xd[k]) for k in key_ls]) 601 model = anp.array([func_list[i](p, x_all[i]) for i in range(len(x_all))]) 602 chisq = anp.sum(((y_f - model) / dy_f) ** 2) 603 return chisq 604 605 output.method = kwargs.get('method', 'Levenberg-Marquardt') 606 if not silent: 607 print('Method:', output.method) 608 609 if output.method != 'Levenberg-Marquardt': 610 if output.method == 'migrad': 611 tolerance = 1e-4 # default value of 1e-1 set by iminuit can be problematic 612 if 'tol' in kwargs: 613 tolerance = kwargs.get('tol') 614 fit_result = iminuit.minimize(chisqfunc, x0, tol=tolerance) # Stopping criterion 0.002 * tol * errordef 615 if kwargs.get('correlated_fit') is True: 616 fit_result = iminuit.minimize(chisqfunc_corr, fit_result.x, tol=tolerance) 617 output.iterations = fit_result.nfev 618 else: 619 tolerance = 1e-12 620 if 'tol' in kwargs: 621 tolerance = kwargs.get('tol') 622 fit_result = scipy.optimize.minimize(chisqfunc, x0, method=kwargs.get('method'), tol=tolerance) 623 if kwargs.get('correlated_fit') is True: 624 fit_result = scipy.optimize.minimize(chisqfunc_corr, fit_result.x, method=kwargs.get('method'), tol=tolerance) 625 output.iterations = fit_result.nit 626 627 chisquare = fit_result.fun 628 629 else: 630 if kwargs.get('correlated_fit') is True: 631 def chisqfunc_residuals_corr(p): 632 model = np.concatenate([np.array(funcd[key](p, np.asarray(xd[key]))).reshape(-1) for key in key_ls]) 633 chisq = anp.dot(chol_inv, (y_f - model)) 634 return chisq 635 636 def chisqfunc_residuals(p): 637 model = np.concatenate([np.array(funcd[key](p, np.asarray(xd[key]))).reshape(-1) for key in key_ls]) 638 chisq = ((y_f - model) / dy_f) 639 return chisq 640 641 if 'tol' in kwargs: 642 print('tol cannot be set for Levenberg-Marquardt') 643 644 fit_result = scipy.optimize.least_squares(chisqfunc_residuals, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15) 645 if kwargs.get('correlated_fit') is True: 646 fit_result = scipy.optimize.least_squares(chisqfunc_residuals_corr, fit_result.x, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15) 647 648 chisquare = np.sum(fit_result.fun ** 2) 649 if kwargs.get('correlated_fit') is True: 650 assert np.isclose(chisquare, chisqfunc_corr(fit_result.x), atol=1e-14) 651 else: 652 assert np.isclose(chisquare, chisqfunc(fit_result.x), atol=1e-14) 653 654 output.iterations = fit_result.nfev 655 656 if not fit_result.success: 657 raise Exception('The minimization procedure did not converge.') 658 659 if x_all.shape[-1] - n_parms > 0: 660 output.chisquare = chisquare 661 output.dof = x_all.shape[-1] - n_parms 662 output.chisquare_by_dof = output.chisquare / output.dof 663 output.p_value = 1 - scipy.stats.chi2.cdf(output.chisquare, output.dof) 664 else: 665 output.chisquare_by_dof = float('nan') 666 667 output.message = fit_result.message 668 if not silent: 669 print(fit_result.message) 670 print('chisquare/d.o.f.:', output.chisquare_by_dof) 671 print('fit parameters', fit_result.x) 672 673 def prepare_hat_matrix(): 674 hat_vector = [] 675 for key in key_ls: 676 x_array = np.asarray(xd[key]) 677 if (len(x_array) != 0): 678 hat_vector.append(jacobian(funcd[key])(fit_result.x, x_array)) 679 hat_vector = [item for sublist in hat_vector for item in sublist] 680 return hat_vector 681 682 if kwargs.get('expected_chisquare') is True: 683 if kwargs.get('correlated_fit') is not True: 684 W = np.diag(1 / np.asarray(dy_f)) 685 cov = covariance(y_all) 686 hat_vector = prepare_hat_matrix() 687 A = W @ hat_vector 688 P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T 689 expected_chisquare = np.trace((np.identity(x_all.shape[-1]) - P_phi) @ W @ cov @ W) 690 output.chisquare_by_expected_chisquare = output.chisquare / expected_chisquare 691 if not silent: 692 print('chisquare/expected_chisquare:', output.chisquare_by_expected_chisquare) 693 694 fitp = fit_result.x 695 if np.any(np.asarray(dy_f) <= 0.0): 696 raise Exception('No y errors available, run the gamma method first.') 697 698 try: 699 if kwargs.get('correlated_fit') is True: 700 hess = hessian(chisqfunc_corr)(fitp) 701 else: 702 hess = hessian(chisqfunc)(fitp) 703 except TypeError: 704 raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None 705 706 if kwargs.get('correlated_fit') is True: 707 def chisqfunc_compact(d): 708 func_list = np.concatenate([[funcd[k]] * len(xd[k]) for k in key_ls]) 709 model = anp.array([func_list[i](d[:n_parms], x_all[i]) for i in range(len(x_all))]) 710 chisq = anp.sum(anp.dot(chol_inv, (d[n_parms:] - model)) ** 2) 711 return chisq 712 else: 713 def chisqfunc_compact(d): 714 func_list = np.concatenate([[funcd[k]] * len(xd[k]) for k in key_ls]) 715 model = anp.array([func_list[i](d[:n_parms], x_all[i]) for i in range(len(x_all))]) 716 chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2) 717 return chisq 718 719 jac_jac_y = hessian(chisqfunc_compact)(np.concatenate((fitp, y_f))) 720 721 # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv 722 try: 723 deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms, n_parms:]) 724 except np.linalg.LinAlgError: 725 raise Exception("Cannot invert hessian matrix.") 726 727 result = [] 728 for i in range(n_parms): 729 result.append(derived_observable(lambda x_all, **kwargs: (x_all[0] + np.finfo(np.float64).eps) / (y_all[0].value + np.finfo(np.float64).eps) * fitp[i], list(y_all), man_grad=list(deriv_y[i]))) 730 731 output.fit_parameters = result 732 733 # Hotelling t-squared p-value for correlated fits. 734 if kwargs.get('correlated_fit') is True: 735 n_cov = np.min(np.vectorize(lambda x_all: x_all.N)(y_all)) 736 output.t2_p_value = 1 - scipy.stats.f.cdf((n_cov - output.dof) / (output.dof * (n_cov - 1)) * output.chisquare, 737 output.dof, n_cov - output.dof) 738 739 if kwargs.get('resplot') is True: 740 for key in key_ls: 741 residual_plot(xd[key], yd[key], funcd[key], result, title=key) 742 743 if kwargs.get('qqplot') is True: 744 for key in key_ls: 745 qqplot(xd[key], yd[key], funcd[key], result, title=key) 746 747 return output 748 749 750def fit_lin(x, y, **kwargs): 751 """Performs a linear fit to y = n + m * x and returns two Obs n, m. 752 753 Parameters 754 ---------- 755 x : list 756 Can either be a list of floats in which case no xerror is assumed, or 757 a list of Obs, where the dvalues of the Obs are used as xerror for the fit. 758 y : list 759 List of Obs, the dvalues of the Obs are used as yerror for the fit. 760 761 Returns 762 ------- 763 fit_parameters : list[Obs] 764 LIist of fitted observables. 765 """ 766 767 def f(a, x): 768 y = a[0] + a[1] * x 769 return y 770 771 if all(isinstance(n, Obs) for n in x): 772 out = total_least_squares(x, y, f, **kwargs) 773 return out.fit_parameters 774 elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray): 775 out = least_squares(x, y, f, **kwargs) 776 return out.fit_parameters 777 else: 778 raise Exception('Unsupported types for x') 779 780 781def qqplot(x, o_y, func, p, title=""): 782 """Generates a quantile-quantile plot of the fit result which can be used to 783 check if the residuals of the fit are gaussian distributed. 784 785 Returns 786 ------- 787 None 788 """ 789 790 residuals = [] 791 for i_x, i_y in zip(x, o_y): 792 residuals.append((i_y - func(p, i_x)) / i_y.dvalue) 793 residuals = sorted(residuals) 794 my_y = [o.value for o in residuals] 795 probplot = scipy.stats.probplot(my_y) 796 my_x = probplot[0][0] 797 plt.figure(figsize=(8, 8 / 1.618)) 798 plt.errorbar(my_x, my_y, fmt='o') 799 fit_start = my_x[0] 800 fit_stop = my_x[-1] 801 samples = np.arange(fit_start, fit_stop, 0.01) 802 plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution') 803 plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-') 804 805 plt.xlabel('Theoretical quantiles') 806 plt.ylabel('Ordered Values') 807 plt.legend(title=title) 808 plt.draw() 809 810 811def residual_plot(x, y, func, fit_res, title=""): 812 """Generates a plot which compares the fit to the data and displays the corresponding residuals 813 814 For uncorrelated data the residuals are expected to be distributed ~N(0,1). 815 816 Returns 817 ------- 818 None 819 """ 820 sorted_x = sorted(x) 821 xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0]) 822 xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2]) 823 x_samples = np.arange(xstart, xstop + 0.01, 0.01) 824 825 plt.figure(figsize=(8, 8 / 1.618)) 826 gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0) 827 ax0 = plt.subplot(gs[0]) 828 ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data') 829 ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0) 830 ax0.set_xticklabels([]) 831 ax0.set_xlim([xstart, xstop]) 832 ax0.set_xticklabels([]) 833 ax0.legend(title=title) 834 835 residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y]) 836 ax1 = plt.subplot(gs[1]) 837 ax1.plot(x, residuals, 'ko', ls='none', markersize=5) 838 ax1.tick_params(direction='out') 839 ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True) 840 ax1.axhline(y=0.0, ls='--', color='k', marker=" ") 841 ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k') 842 ax1.set_xlim([xstart, xstop]) 843 ax1.set_ylabel('Residuals') 844 plt.subplots_adjust(wspace=None, hspace=None) 845 plt.draw() 846 847 848def error_band(x, func, beta): 849 """Calculate the error band for an array of sample values x, for given fit function func with optimized parameters beta. 850 851 Returns 852 ------- 853 err : np.array(Obs) 854 Error band for an array of sample values x 855 """ 856 cov = covariance(beta) 857 if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps): 858 warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning) 859 860 deriv = [] 861 for i, item in enumerate(x): 862 deriv.append(np.array(egrad(func)([o.value for o in beta], item))) 863 864 err = [] 865 for i, item in enumerate(x): 866 err.append(np.sqrt(deriv[i] @ cov @ deriv[i])) 867 err = np.array(err) 868 869 return err 870 871 872def ks_test(objects=None): 873 """Performs a Kolmogorov–Smirnov test for the p-values of all fit object. 874 875 Parameters 876 ---------- 877 objects : list 878 List of fit results to include in the analysis (optional). 879 880 Returns 881 ------- 882 None 883 """ 884 885 if objects is None: 886 obs_list = [] 887 for obj in gc.get_objects(): 888 if isinstance(obj, Fit_result): 889 obs_list.append(obj) 890 else: 891 obs_list = objects 892 893 p_values = [o.p_value for o in obs_list] 894 895 bins = len(p_values) 896 x = np.arange(0, 1.001, 0.001) 897 plt.plot(x, x, 'k', zorder=1) 898 plt.xlim(0, 1) 899 plt.ylim(0, 1) 900 plt.xlabel('p-value') 901 plt.ylabel('Cumulative probability') 902 plt.title(str(bins) + ' p-values') 903 904 n = np.arange(1, bins + 1) / np.float64(bins) 905 Xs = np.sort(p_values) 906 plt.step(Xs, n) 907 diffs = n - Xs 908 loc_max_diff = np.argmax(np.abs(diffs)) 909 loc = Xs[loc_max_diff] 910 plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0)) 911 plt.draw() 912 913 print(scipy.stats.kstest(p_values, 'uniform'))
21class Fit_result(Sequence): 22 """Represents fit results. 23 24 Attributes 25 ---------- 26 fit_parameters : list 27 results for the individual fit parameters, 28 also accessible via indices. 29 chisquare_by_dof : float 30 reduced chisquare. 31 p_value : float 32 p-value of the fit 33 t2_p_value : float 34 Hotelling t-squared p-value for correlated fits. 35 """ 36 37 def __init__(self): 38 self.fit_parameters = None 39 40 def __getitem__(self, idx): 41 return self.fit_parameters[idx] 42 43 def __len__(self): 44 return len(self.fit_parameters) 45 46 def gamma_method(self, **kwargs): 47 """Apply the gamma method to all fit parameters""" 48 [o.gamma_method(**kwargs) for o in self.fit_parameters] 49 50 gm = gamma_method 51 52 def __str__(self): 53 my_str = 'Goodness of fit:\n' 54 if hasattr(self, 'chisquare_by_dof'): 55 my_str += '\u03C7\u00b2/d.o.f. = ' + f'{self.chisquare_by_dof:2.6f}' + '\n' 56 elif hasattr(self, 'residual_variance'): 57 my_str += 'residual variance = ' + f'{self.residual_variance:2.6f}' + '\n' 58 if hasattr(self, 'chisquare_by_expected_chisquare'): 59 my_str += '\u03C7\u00b2/\u03C7\u00b2exp = ' + f'{self.chisquare_by_expected_chisquare:2.6f}' + '\n' 60 if hasattr(self, 'p_value'): 61 my_str += 'p-value = ' + f'{self.p_value:2.4f}' + '\n' 62 if hasattr(self, 't2_p_value'): 63 my_str += 't\u00B2p-value = ' + f'{self.t2_p_value:2.4f}' + '\n' 64 my_str += 'Fit parameters:\n' 65 for i_par, par in enumerate(self.fit_parameters): 66 my_str += str(i_par) + '\t' + ' ' * int(par >= 0) + str(par).rjust(int(par < 0.0)) + '\n' 67 return my_str 68 69 def __repr__(self): 70 m = max(map(len, list(self.__dict__.keys()))) + 1 71 return '\n'.join([key.rjust(m) + ': ' + repr(value) for key, value in sorted(self.__dict__.items())])
Represents fit results.
Attributes
- fit_parameters (list): results for the individual fit parameters, also accessible via indices.
- chisquare_by_dof (float): reduced chisquare.
- p_value (float): p-value of the fit
- t2_p_value (float): Hotelling t-squared p-value for correlated fits.
46 def gamma_method(self, **kwargs): 47 """Apply the gamma method to all fit parameters""" 48 [o.gamma_method(**kwargs) for o in self.fit_parameters]
Apply the gamma method to all fit parameters
46 def gamma_method(self, **kwargs): 47 """Apply the gamma method to all fit parameters""" 48 [o.gamma_method(**kwargs) for o in self.fit_parameters]
Apply the gamma method to all fit parameters
Inherited Members
- collections.abc.Sequence
- index
- count
74def least_squares(x, y, func, priors=None, silent=False, **kwargs): 75 r'''Performs a non-linear fit to y = func(x). 76 ``` 77 78 Parameters 79 ---------- 80 For an uncombined fit: 81 82 x : list 83 list of floats. 84 y : list 85 list of Obs. 86 func : object 87 fit function, has to be of the form 88 89 ```python 90 import autograd.numpy as anp 91 92 def func(a, x): 93 return a[0] + a[1] * x + a[2] * anp.sinh(x) 94 ``` 95 96 For multiple x values func can be of the form 97 98 ```python 99 def func(a, x): 100 (x1, x2) = x 101 return a[0] * x1 ** 2 + a[1] * x2 102 ``` 103 It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation 104 will not work. 105 106 OR For a combined fit: 107 108 x : dict 109 dict of lists. 110 y : dict 111 dict of lists of Obs. 112 funcs : dict 113 dict of objects 114 fit functions have to be of the form (here a[0] is the common fit parameter) 115 ```python 116 import autograd.numpy as anp 117 funcs = {"a": func_a, 118 "b": func_b} 119 120 def func_a(a, x): 121 return a[1] * anp.exp(-a[0] * x) 122 123 def func_b(a, x): 124 return a[2] * anp.exp(-a[0] * x) 125 126 It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation 127 will not work. 128 129 priors : list, optional 130 priors has to be a list with an entry for every parameter in the fit. The entries can either be 131 Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 132 0.548(23), 500(40) or 0.5(0.4) 133 silent : bool, optional 134 If true all output to the console is omitted (default False). 135 initial_guess : list 136 can provide an initial guess for the input parameters. Relevant for 137 non-linear fits with many parameters. In case of correlated fits the guess is used to perform 138 an uncorrelated fit which then serves as guess for the correlated fit. 139 method : str, optional 140 can be used to choose an alternative method for the minimization of chisquare. 141 The possible methods are the ones which can be used for scipy.optimize.minimize and 142 migrad of iminuit. If no method is specified, Levenberg-Marquard is used. 143 Reliable alternatives are migrad, Powell and Nelder-Mead. 144 tol: float, optional 145 can be used (only for combined fits and methods other than Levenberg-Marquard) to set the tolerance for convergence 146 to a different value to either speed up convergence at the cost of a larger error on the fitted parameters (and possibly 147 invalid estimates for parameter uncertainties) or smaller values to get more accurate parameter values 148 The stopping criterion depends on the method, e.g. migrad: edm_max = 0.002 * tol * errordef (EDM criterion: edm < edm_max) 149 correlated_fit : bool 150 If True, use the full inverse covariance matrix in the definition of the chisquare cost function. 151 For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`. 152 In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix). 153 This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning). 154 At the moment this option only works for `prior==None` and when no `method` is given. 155 expected_chisquare : bool 156 If True estimates the expected chisquare which is 157 corrected by effects caused by correlated input data (default False). 158 resplot : bool 159 If True, a plot which displays fit, data and residuals is generated (default False). 160 qqplot : bool 161 If True, a quantile-quantile plot of the fit result is generated (default False). 162 num_grad : bool 163 Use numerical differentation instead of automatic differentiation to perform the error propagation (default False). 164 165 Returns 166 ------- 167 output : Fit_result 168 Parameters and information on the fitted result. 169 ''' 170 if priors is not None: 171 return _prior_fit(x, y, func, priors, silent=silent, **kwargs) 172 else: 173 return _combined_fit(x, y, func, silent=silent, **kwargs)
Performs a non-linear fit to y = func(x). ```
Parameters
- For an uncombined fit:
- x (list): list of floats.
- y (list): list of Obs.
func (object): fit function, has to be of the form
import autograd.numpy as anp def func(a, x): return a[0] + a[1] * x + a[2] * anp.sinh(x)
For multiple x values func can be of the form
def func(a, x): (x1, x2) = x return a[0] * x1 ** 2 + a[1] * x2
It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.
- OR For a combined fit:
- x (dict): dict of lists.
- y (dict): dict of lists of Obs.
funcs (dict): dict of objects fit functions have to be of the form (here a[0] is the common fit parameter) ```python import autograd.numpy as anp funcs = {"a": func_a, "b": func_b}
def func_a(a, x): return a[1] * anp.exp(-a[0] * x)
def func_b(a, x): return a[2] * anp.exp(-a[0] * x)
It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.
- priors (list, optional): priors has to be a list with an entry for every parameter in the fit. The entries can either be Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 0.548(23), 500(40) or 0.5(0.4)
- silent (bool, optional): If true all output to the console is omitted (default False).
- initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters. In case of correlated fits the guess is used to perform an uncorrelated fit which then serves as guess for the correlated fit.
- method (str, optional): can be used to choose an alternative method for the minimization of chisquare. The possible methods are the ones which can be used for scipy.optimize.minimize and migrad of iminuit. If no method is specified, Levenberg-Marquard is used. Reliable alternatives are migrad, Powell and Nelder-Mead.
- tol (float, optional): can be used (only for combined fits and methods other than Levenberg-Marquard) to set the tolerance for convergence to a different value to either speed up convergence at the cost of a larger error on the fitted parameters (and possibly invalid estimates for parameter uncertainties) or smaller values to get more accurate parameter values The stopping criterion depends on the method, e.g. migrad: edm_max = 0.002 * tol * errordef (EDM criterion: edm < edm_max)
- correlated_fit (bool):
If True, use the full inverse covariance matrix in the definition of the chisquare cost function.
For details about how the covariance matrix is estimated see
pyerrors.obs.covariance
. In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix). This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning). At the moment this option only works forprior==None
and when nomethod
is given. - expected_chisquare (bool): If True estimates the expected chisquare which is corrected by effects caused by correlated input data (default False).
- resplot (bool): If True, a plot which displays fit, data and residuals is generated (default False).
- qqplot (bool): If True, a quantile-quantile plot of the fit result is generated (default False).
- num_grad (bool): Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
Returns
- output (Fit_result): Parameters and information on the fitted result.
176def total_least_squares(x, y, func, silent=False, **kwargs): 177 r'''Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters. 178 179 Parameters 180 ---------- 181 x : list 182 list of Obs, or a tuple of lists of Obs 183 y : list 184 list of Obs. The dvalues of the Obs are used as x- and yerror for the fit. 185 func : object 186 func has to be of the form 187 188 ```python 189 import autograd.numpy as anp 190 191 def func(a, x): 192 return a[0] + a[1] * x + a[2] * anp.sinh(x) 193 ``` 194 195 For multiple x values func can be of the form 196 197 ```python 198 def func(a, x): 199 (x1, x2) = x 200 return a[0] * x1 ** 2 + a[1] * x2 201 ``` 202 203 It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation 204 will not work. 205 silent : bool, optional 206 If true all output to the console is omitted (default False). 207 initial_guess : list 208 can provide an initial guess for the input parameters. Relevant for non-linear 209 fits with many parameters. 210 expected_chisquare : bool 211 If true prints the expected chisquare which is 212 corrected by effects caused by correlated input data. 213 This can take a while as the full correlation matrix 214 has to be calculated (default False). 215 num_grad : bool 216 Use numerical differentation instead of automatic differentiation to perform the error propagation (default False). 217 218 Notes 219 ----- 220 Based on the orthogonal distance regression module of scipy. 221 222 Returns 223 ------- 224 output : Fit_result 225 Parameters and information on the fitted result. 226 ''' 227 228 output = Fit_result() 229 230 output.fit_function = func 231 232 x = np.array(x) 233 234 x_shape = x.shape 235 236 if kwargs.get('num_grad') is True: 237 jacobian = num_jacobian 238 hessian = num_hessian 239 else: 240 jacobian = auto_jacobian 241 hessian = auto_hessian 242 243 if not callable(func): 244 raise TypeError('func has to be a function.') 245 246 for i in range(42): 247 try: 248 func(np.arange(i), x.T[0]) 249 except TypeError: 250 continue 251 except IndexError: 252 continue 253 else: 254 break 255 else: 256 raise RuntimeError("Fit function is not valid.") 257 258 n_parms = i 259 if not silent: 260 print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1)) 261 262 x_f = np.vectorize(lambda o: o.value)(x) 263 dx_f = np.vectorize(lambda o: o.dvalue)(x) 264 y_f = np.array([o.value for o in y]) 265 dy_f = np.array([o.dvalue for o in y]) 266 267 if np.any(np.asarray(dx_f) <= 0.0): 268 raise Exception('No x errors available, run the gamma method first.') 269 270 if np.any(np.asarray(dy_f) <= 0.0): 271 raise Exception('No y errors available, run the gamma method first.') 272 273 if 'initial_guess' in kwargs: 274 x0 = kwargs.get('initial_guess') 275 if len(x0) != n_parms: 276 raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms)) 277 else: 278 x0 = [1] * n_parms 279 280 data = RealData(x_f, y_f, sx=dx_f, sy=dy_f) 281 model = Model(func) 282 odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps) 283 odr.set_job(fit_type=0, deriv=1) 284 out = odr.run() 285 286 output.residual_variance = out.res_var 287 288 output.method = 'ODR' 289 290 output.message = out.stopreason 291 292 output.xplus = out.xplus 293 294 if not silent: 295 print('Method: ODR') 296 print(*out.stopreason) 297 print('Residual variance:', output.residual_variance) 298 299 if out.info > 3: 300 raise Exception('The minimization procedure did not converge.') 301 302 m = x_f.size 303 304 def odr_chisquare(p): 305 model = func(p[:n_parms], p[n_parms:].reshape(x_shape)) 306 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2) 307 return chisq 308 309 if kwargs.get('expected_chisquare') is True: 310 W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel())))) 311 312 if kwargs.get('covariance') is not None: 313 cov = kwargs.get('covariance') 314 else: 315 cov = covariance(np.concatenate((y, x.ravel()))) 316 317 number_of_x_parameters = int(m / x_f.shape[-1]) 318 319 old_jac = jacobian(func)(out.beta, out.xplus) 320 fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0))) 321 fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0]))) 322 new_jac = np.concatenate((fused_row1, fused_row2), axis=1) 323 324 A = W @ new_jac 325 P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T 326 expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W) 327 if expected_chisquare <= 0.0: 328 warnings.warn("Negative expected_chisquare.", RuntimeWarning) 329 expected_chisquare = np.abs(expected_chisquare) 330 output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare 331 if not silent: 332 print('chisquare/expected_chisquare:', 333 output.chisquare_by_expected_chisquare) 334 335 fitp = out.beta 336 try: 337 hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel()))) 338 except TypeError: 339 raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None 340 341 def odr_chisquare_compact_x(d): 342 model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape)) 343 chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) 344 return chisq 345 346 jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel()))) 347 348 # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv 349 try: 350 deriv_x = -scipy.linalg.solve(hess, jac_jac_x[:n_parms + m, n_parms + m:]) 351 except np.linalg.LinAlgError: 352 raise Exception("Cannot invert hessian matrix.") 353 354 def odr_chisquare_compact_y(d): 355 model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape)) 356 chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) 357 return chisq 358 359 jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f))) 360 361 # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv 362 try: 363 deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms + m, n_parms + m:]) 364 except np.linalg.LinAlgError: 365 raise Exception("Cannot invert hessian matrix.") 366 367 result = [] 368 for i in range(n_parms): 369 result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i]))) 370 371 output.fit_parameters = result 372 373 output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) 374 output.dof = x.shape[-1] - n_parms 375 output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof) 376 377 return output
Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
Parameters
- x (list): list of Obs, or a tuple of lists of Obs
- y (list): list of Obs. The dvalues of the Obs are used as x- and yerror for the fit.
func (object): func has to be of the form
import autograd.numpy as anp def func(a, x): return a[0] + a[1] * x + a[2] * anp.sinh(x)
For multiple x values func can be of the form
def func(a, x): (x1, x2) = x return a[0] * x1 ** 2 + a[1] * x2
It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation will not work.
- silent (bool, optional): If true all output to the console is omitted (default False).
- initial_guess (list): can provide an initial guess for the input parameters. Relevant for non-linear fits with many parameters.
- expected_chisquare (bool): If true prints the expected chisquare which is corrected by effects caused by correlated input data. This can take a while as the full correlation matrix has to be calculated (default False).
- num_grad (bool): Use numerical differentation instead of automatic differentiation to perform the error propagation (default False).
Notes
Based on the orthogonal distance regression module of scipy.
Returns
- output (Fit_result): Parameters and information on the fitted result.
751def fit_lin(x, y, **kwargs): 752 """Performs a linear fit to y = n + m * x and returns two Obs n, m. 753 754 Parameters 755 ---------- 756 x : list 757 Can either be a list of floats in which case no xerror is assumed, or 758 a list of Obs, where the dvalues of the Obs are used as xerror for the fit. 759 y : list 760 List of Obs, the dvalues of the Obs are used as yerror for the fit. 761 762 Returns 763 ------- 764 fit_parameters : list[Obs] 765 LIist of fitted observables. 766 """ 767 768 def f(a, x): 769 y = a[0] + a[1] * x 770 return y 771 772 if all(isinstance(n, Obs) for n in x): 773 out = total_least_squares(x, y, f, **kwargs) 774 return out.fit_parameters 775 elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray): 776 out = least_squares(x, y, f, **kwargs) 777 return out.fit_parameters 778 else: 779 raise Exception('Unsupported types for x')
Performs a linear fit to y = n + m * x and returns two Obs n, m.
Parameters
- x (list): Can either be a list of floats in which case no xerror is assumed, or a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
- y (list): List of Obs, the dvalues of the Obs are used as yerror for the fit.
Returns
- fit_parameters (list[Obs]): LIist of fitted observables.
782def qqplot(x, o_y, func, p, title=""): 783 """Generates a quantile-quantile plot of the fit result which can be used to 784 check if the residuals of the fit are gaussian distributed. 785 786 Returns 787 ------- 788 None 789 """ 790 791 residuals = [] 792 for i_x, i_y in zip(x, o_y): 793 residuals.append((i_y - func(p, i_x)) / i_y.dvalue) 794 residuals = sorted(residuals) 795 my_y = [o.value for o in residuals] 796 probplot = scipy.stats.probplot(my_y) 797 my_x = probplot[0][0] 798 plt.figure(figsize=(8, 8 / 1.618)) 799 plt.errorbar(my_x, my_y, fmt='o') 800 fit_start = my_x[0] 801 fit_stop = my_x[-1] 802 samples = np.arange(fit_start, fit_stop, 0.01) 803 plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution') 804 plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)), marker='', ls='-') 805 806 plt.xlabel('Theoretical quantiles') 807 plt.ylabel('Ordered Values') 808 plt.legend(title=title) 809 plt.draw()
Generates a quantile-quantile plot of the fit result which can be used to check if the residuals of the fit are gaussian distributed.
Returns
- None
812def residual_plot(x, y, func, fit_res, title=""): 813 """Generates a plot which compares the fit to the data and displays the corresponding residuals 814 815 For uncorrelated data the residuals are expected to be distributed ~N(0,1). 816 817 Returns 818 ------- 819 None 820 """ 821 sorted_x = sorted(x) 822 xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0]) 823 xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2]) 824 x_samples = np.arange(xstart, xstop + 0.01, 0.01) 825 826 plt.figure(figsize=(8, 8 / 1.618)) 827 gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0) 828 ax0 = plt.subplot(gs[0]) 829 ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data') 830 ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10, ls='-', ms=0) 831 ax0.set_xticklabels([]) 832 ax0.set_xlim([xstart, xstop]) 833 ax0.set_xticklabels([]) 834 ax0.legend(title=title) 835 836 residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y]) 837 ax1 = plt.subplot(gs[1]) 838 ax1.plot(x, residuals, 'ko', ls='none', markersize=5) 839 ax1.tick_params(direction='out') 840 ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True) 841 ax1.axhline(y=0.0, ls='--', color='k', marker=" ") 842 ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k') 843 ax1.set_xlim([xstart, xstop]) 844 ax1.set_ylabel('Residuals') 845 plt.subplots_adjust(wspace=None, hspace=None) 846 plt.draw()
Generates a plot which compares the fit to the data and displays the corresponding residuals
For uncorrelated data the residuals are expected to be distributed ~N(0,1).
Returns
- None
849def error_band(x, func, beta): 850 """Calculate the error band for an array of sample values x, for given fit function func with optimized parameters beta. 851 852 Returns 853 ------- 854 err : np.array(Obs) 855 Error band for an array of sample values x 856 """ 857 cov = covariance(beta) 858 if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps): 859 warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning) 860 861 deriv = [] 862 for i, item in enumerate(x): 863 deriv.append(np.array(egrad(func)([o.value for o in beta], item))) 864 865 err = [] 866 for i, item in enumerate(x): 867 err.append(np.sqrt(deriv[i] @ cov @ deriv[i])) 868 err = np.array(err) 869 870 return err
Calculate the error band for an array of sample values x, for given fit function func with optimized parameters beta.
Returns
- err (np.array(Obs)): Error band for an array of sample values x
873def ks_test(objects=None): 874 """Performs a Kolmogorov–Smirnov test for the p-values of all fit object. 875 876 Parameters 877 ---------- 878 objects : list 879 List of fit results to include in the analysis (optional). 880 881 Returns 882 ------- 883 None 884 """ 885 886 if objects is None: 887 obs_list = [] 888 for obj in gc.get_objects(): 889 if isinstance(obj, Fit_result): 890 obs_list.append(obj) 891 else: 892 obs_list = objects 893 894 p_values = [o.p_value for o in obs_list] 895 896 bins = len(p_values) 897 x = np.arange(0, 1.001, 0.001) 898 plt.plot(x, x, 'k', zorder=1) 899 plt.xlim(0, 1) 900 plt.ylim(0, 1) 901 plt.xlabel('p-value') 902 plt.ylabel('Cumulative probability') 903 plt.title(str(bins) + ' p-values') 904 905 n = np.arange(1, bins + 1) / np.float64(bins) 906 Xs = np.sort(p_values) 907 plt.step(Xs, n) 908 diffs = n - Xs 909 loc_max_diff = np.argmax(np.abs(diffs)) 910 loc = Xs[loc_max_diff] 911 plt.annotate('', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0)) 912 plt.draw() 913 914 print(scipy.stats.kstest(p_values, 'uniform'))
Performs a Kolmogorov–Smirnov test for the p-values of all fit object.
Parameters
- objects (list): List of fit results to include in the analysis (optional).
Returns
- None