diff --git a/pyerrors/correlators.py b/pyerrors/correlators.py index 67533cb2..c581fbe8 100644 --- a/pyerrors/correlators.py +++ b/pyerrors/correlators.py @@ -734,8 +734,8 @@ class Corr: if len(fitrange) != 2: raise Exception("fitrange has to have exactly two elements [fit_start, fit_stop]") - xs = [x for x in range(fitrange[0], fitrange[1] + 1) if not self.content[x] is None] - ys = [self.content[x][0] for x in range(fitrange[0], fitrange[1] + 1) if not self.content[x] is None] + xs = np.array([x for x in range(fitrange[0], fitrange[1] + 1) if not self.content[x] is None]) + ys = np.array([self.content[x][0] for x in range(fitrange[0], fitrange[1] + 1) if not self.content[x] is None]) result = least_squares(xs, ys, function, silent=silent, **kwargs) return result diff --git a/pyerrors/fits.py b/pyerrors/fits.py index fe4770fe..7f57c5df 100644 --- a/pyerrors/fits.py +++ b/pyerrors/fits.py @@ -168,13 +168,8 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs): ''' if priors is not None: return _prior_fit(x, y, func, priors, silent=silent, **kwargs) - - elif (type(x) == dict and type(y) == dict and type(func) == dict): - return _combined_fit(x, y, func, silent=silent, **kwargs) - elif (type(x) == dict or type(y) == dict or type(func) == dict): - raise TypeError("All arguments have to be dictionaries in order to perform a combined fit.") else: - return _standard_fit(x, y, func, silent=silent, **kwargs) + return _combined_fit(x, y, func, silent=silent, **kwargs) def total_least_squares(x, y, func, silent=False, **kwargs): @@ -509,204 +504,23 @@ def _prior_fit(x, y, func, priors, silent=False, **kwargs): return output -def _standard_fit(x, y, func, silent=False, **kwargs): - output = Fit_result() - - output.fit_function = func - - x = np.asarray(x) - - if kwargs.get('num_grad') is True: - jacobian = num_jacobian - hessian = num_hessian - else: - jacobian = auto_jacobian - hessian = auto_hessian - - if x.shape[-1] != len(y): - raise Exception('x and y input have to have the same length') - - if len(x.shape) > 2: - raise Exception('Unknown format for x values') - - if not callable(func): - raise TypeError('func has to be a function.') - - for i in range(42): - try: - func(np.arange(i), x.T[0]) - except TypeError: - continue - except IndexError: - continue - else: - break - else: - raise RuntimeError("Fit function is not valid.") - - n_parms = i - - if not silent: - print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1)) - - y_f = [o.value for o in y] - dy_f = [o.dvalue for o in y] - - if np.any(np.asarray(dy_f) <= 0.0): - raise Exception('No y errors available, run the gamma method first.') - - if 'initial_guess' in kwargs: - x0 = kwargs.get('initial_guess') - if len(x0) != n_parms: - raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms)) - else: - x0 = [0.1] * n_parms - - if kwargs.get('correlated_fit') is True: - corr = covariance(y, correlation=True, **kwargs) - covdiag = np.diag(1 / np.asarray(dy_f)) - condn = np.linalg.cond(corr) - if condn > 0.1 / np.finfo(float).eps: - raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})") - if condn > 1e13: - warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning) - chol = np.linalg.cholesky(corr) - chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True) - - def chisqfunc_corr(p): - model = func(p, x) - chisq = anp.sum(anp.dot(chol_inv, (y_f - model)) ** 2) - return chisq - - def chisqfunc(p): - model = func(p, x) - chisq = anp.sum(((y_f - model) / dy_f) ** 2) - return chisq - - output.method = kwargs.get('method', 'Levenberg-Marquardt') - if not silent: - print('Method:', output.method) - - if output.method != 'Levenberg-Marquardt': - if output.method == 'migrad': - fit_result = iminuit.minimize(chisqfunc, x0, tol=1e-4) # Stopping criterion 0.002 * tol * errordef - if kwargs.get('correlated_fit') is True: - fit_result = iminuit.minimize(chisqfunc_corr, fit_result.x, tol=1e-4) # Stopping criterion 0.002 * tol * errordef - output.iterations = fit_result.nfev - else: - fit_result = scipy.optimize.minimize(chisqfunc, x0, method=kwargs.get('method'), tol=1e-12) - if kwargs.get('correlated_fit') is True: - fit_result = scipy.optimize.minimize(chisqfunc_corr, fit_result.x, method=kwargs.get('method'), tol=1e-12) - output.iterations = fit_result.nit - - chisquare = fit_result.fun - - else: - if kwargs.get('correlated_fit') is True: - def chisqfunc_residuals_corr(p): - model = func(p, x) - chisq = anp.dot(chol_inv, (y_f - model)) - return chisq - - def chisqfunc_residuals(p): - model = func(p, x) - chisq = ((y_f - model) / dy_f) - return chisq - - fit_result = scipy.optimize.least_squares(chisqfunc_residuals, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15) - if kwargs.get('correlated_fit') is True: - fit_result = scipy.optimize.least_squares(chisqfunc_residuals_corr, fit_result.x, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15) - - chisquare = np.sum(fit_result.fun ** 2) - if kwargs.get('correlated_fit') is True: - assert np.isclose(chisquare, chisqfunc_corr(fit_result.x), atol=1e-14) - else: - assert np.isclose(chisquare, chisqfunc(fit_result.x), atol=1e-14) - - output.iterations = fit_result.nfev - - if not fit_result.success: - raise Exception('The minimization procedure did not converge.') - - if x.shape[-1] - n_parms > 0: - output.chisquare_by_dof = chisquare / (x.shape[-1] - n_parms) - else: - output.chisquare_by_dof = float('nan') - - output.message = fit_result.message - if not silent: - print(fit_result.message) - print('chisquare/d.o.f.:', output.chisquare_by_dof) - - if kwargs.get('expected_chisquare') is True: - if kwargs.get('correlated_fit') is not True: - W = np.diag(1 / np.asarray(dy_f)) - cov = covariance(y) - A = W @ jacobian(func)(fit_result.x, x) - P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T - expected_chisquare = np.trace((np.identity(x.shape[-1]) - P_phi) @ W @ cov @ W) - output.chisquare_by_expected_chisquare = chisquare / expected_chisquare - if not silent: - print('chisquare/expected_chisquare:', - output.chisquare_by_expected_chisquare) - - fitp = fit_result.x - try: - if kwargs.get('correlated_fit') is True: - hess = hessian(chisqfunc_corr)(fitp) - else: - hess = hessian(chisqfunc)(fitp) - except TypeError: - raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None - - if kwargs.get('correlated_fit') is True: - def chisqfunc_compact(d): - model = func(d[:n_parms], x) - chisq = anp.sum(anp.dot(chol_inv, (d[n_parms:] - model)) ** 2) - return chisq - - else: - def chisqfunc_compact(d): - model = func(d[:n_parms], x) - chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2) - return chisq - - jac_jac = hessian(chisqfunc_compact)(np.concatenate((fitp, y_f))) - - # Compute hess^{-1} @ jac_jac[:n_parms, n_parms:] using LAPACK dgesv - try: - deriv = -scipy.linalg.solve(hess, jac_jac[:n_parms, n_parms:]) - except np.linalg.LinAlgError: - raise Exception("Cannot invert hessian matrix.") - - result = [] - for i in range(n_parms): - result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * fit_result.x[i], list(y), man_grad=list(deriv[i]))) - - output.fit_parameters = result - - output.chisquare = chisquare - output.dof = x.shape[-1] - n_parms - output.p_value = 1 - scipy.stats.chi2.cdf(output.chisquare, output.dof) - # Hotelling t-squared p-value for correlated fits. - if kwargs.get('correlated_fit') is True: - n_cov = np.min(np.vectorize(lambda x: x.N)(y)) - output.t2_p_value = 1 - scipy.stats.f.cdf((n_cov - output.dof) / (output.dof * (n_cov - 1)) * output.chisquare, - output.dof, n_cov - output.dof) - - if kwargs.get('resplot') is True: - residual_plot(x, y, func, result) - - if kwargs.get('qqplot') is True: - qqplot(x, y, func, result) - - return output - - def _combined_fit(x, y, func, silent=False, **kwargs): output = Fit_result() - output.fit_function = func + + if (type(x) == dict and type(y) == dict and type(func) == dict): + xd = x + yd = y + funcd = func + output.fit_function = func + elif (type(x) == dict or type(y) == dict or type(func) == dict): + raise TypeError("All arguments have to be dictionaries in order to perform a combined fit.") + else: + x = np.asarray(x) + xd = {"": x} + yd = {"": y} + funcd = {"": func} + output.fit_function = func if kwargs.get('num_grad') is True: jacobian = num_jacobian @@ -715,16 +529,16 @@ def _combined_fit(x, y, func, silent=False, **kwargs): jacobian = auto_jacobian hessian = auto_hessian - key_ls = sorted(list(x.keys())) + key_ls = sorted(list(xd.keys())) - if sorted(list(y.keys())) != key_ls: + if sorted(list(yd.keys())) != key_ls: raise Exception('x and y dictionaries do not contain the same keys.') - if sorted(list(func.keys())) != key_ls: + if sorted(list(funcd.keys())) != key_ls: raise Exception('x and func dictionaries do not contain the same keys.') - x_all = np.concatenate([np.array(x[key]) for key in key_ls]) - y_all = np.concatenate([np.array(y[key]) for key in key_ls]) + x_all = np.concatenate([np.array(xd[key]) for key in key_ls]) + y_all = np.concatenate([np.array(yd[key]) for key in key_ls]) y_f = [o.value for o in y_all] dy_f = [o.dvalue for o in y_all] @@ -738,13 +552,13 @@ def _combined_fit(x, y, func, silent=False, **kwargs): # number of fit parameters n_parms_ls = [] for key in key_ls: - if not callable(func[key]): + if not callable(funcd[key]): raise TypeError('func (key=' + key + ') is not a function.') - if len(x[key]) != len(y[key]): + if len(xd[key]) != len(yd[key]): raise Exception('x and y input (key=' + key + ') do not have the same length') for i in range(100): try: - func[key](np.arange(i), x_all.T[0]) + funcd[key](np.arange(i), x_all.T[0]) except TypeError: continue except IndexError: @@ -778,12 +592,12 @@ def _combined_fit(x, y, func, silent=False, **kwargs): chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True) def chisqfunc_corr(p): - model = np.concatenate([np.array(func[key](p, np.asarray(x[key]))) for key in key_ls]) + model = np.concatenate([np.array(funcd[key](p, np.asarray(xd[key]))).reshape(-1) for key in key_ls]) chisq = anp.sum(anp.dot(chol_inv, (y_f - model)) ** 2) return chisq def chisqfunc(p): - func_list = np.concatenate([[func[k]] * len(x[k]) for k in key_ls]) + func_list = np.concatenate([[funcd[k]] * len(xd[k]) for k in key_ls]) model = anp.array([func_list[i](p, x_all[i]) for i in range(len(x_all))]) chisq = anp.sum(((y_f - model) / dy_f) ** 2) return chisq @@ -815,12 +629,12 @@ def _combined_fit(x, y, func, silent=False, **kwargs): else: if kwargs.get('correlated_fit') is True: def chisqfunc_residuals_corr(p): - model = np.concatenate([np.array(func[key](p, np.asarray(x[key]))) for key in key_ls]) + model = np.concatenate([np.array(funcd[key](p, np.asarray(xd[key]))).reshape(-1) for key in key_ls]) chisq = anp.dot(chol_inv, (y_f - model)) return chisq def chisqfunc_residuals(p): - model = np.concatenate([np.array(func[key](p, np.asarray(x[key]))) for key in key_ls]) + model = np.concatenate([np.array(funcd[key](p, np.asarray(xd[key]))).reshape(-1) for key in key_ls]) chisq = ((y_f - model) / dy_f) return chisq @@ -859,9 +673,9 @@ def _combined_fit(x, y, func, silent=False, **kwargs): def prepare_hat_matrix(): hat_vector = [] for key in key_ls: - x_array = np.asarray(x[key]) + x_array = np.asarray(xd[key]) if (len(x_array) != 0): - hat_vector.append(jacobian(func[key])(fit_result.x, x_array)) + hat_vector.append(jacobian(funcd[key])(fit_result.x, x_array)) hat_vector = [item for sublist in hat_vector for item in sublist] return hat_vector @@ -870,7 +684,7 @@ def _combined_fit(x, y, func, silent=False, **kwargs): W = np.diag(1 / np.asarray(dy_f)) cov = covariance(y_all) hat_vector = prepare_hat_matrix() - A = W @ hat_vector # hat_vector = 'jacobian(func)(fit_result.x, x)' + A = W @ hat_vector P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T expected_chisquare = np.trace((np.identity(x_all.shape[-1]) - P_phi) @ W @ cov @ W) output.chisquare_by_expected_chisquare = output.chisquare / expected_chisquare @@ -891,13 +705,13 @@ def _combined_fit(x, y, func, silent=False, **kwargs): if kwargs.get('correlated_fit') is True: def chisqfunc_compact(d): - func_list = np.concatenate([[func[k]] * len(x[k]) for k in key_ls]) + func_list = np.concatenate([[funcd[k]] * len(xd[k]) for k in key_ls]) model = anp.array([func_list[i](d[:n_parms], x_all[i]) for i in range(len(x_all))]) chisq = anp.sum(anp.dot(chol_inv, (d[n_parms:] - model)) ** 2) return chisq else: def chisqfunc_compact(d): - func_list = np.concatenate([[func[k]] * len(x[k]) for k in key_ls]) + func_list = np.concatenate([[funcd[k]] * len(xd[k]) for k in key_ls]) model = anp.array([func_list[i](d[:n_parms], x_all[i]) for i in range(len(x_all))]) chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2) return chisq @@ -916,11 +730,20 @@ def _combined_fit(x, y, func, silent=False, **kwargs): output.fit_parameters = result + # Hotelling t-squared p-value for correlated fits. if kwargs.get('correlated_fit') is True: n_cov = np.min(np.vectorize(lambda x_all: x_all.N)(y_all)) output.t2_p_value = 1 - scipy.stats.f.cdf((n_cov - output.dof) / (output.dof * (n_cov - 1)) * output.chisquare, output.dof, n_cov - output.dof) + if kwargs.get('resplot') is True: + for key in key_ls: + residual_plot(xd[key], yd[key], funcd[key], result, title=key) + + if kwargs.get('qqplot') is True: + for key in key_ls: + qqplot(xd[key], yd[key], funcd[key], result, title=key) + return output @@ -955,7 +778,7 @@ def fit_lin(x, y, **kwargs): raise Exception('Unsupported types for x') -def qqplot(x, o_y, func, p): +def qqplot(x, o_y, func, p, title=""): """Generates a quantile-quantile plot of the fit result which can be used to check if the residuals of the fit are gaussian distributed. @@ -981,13 +804,15 @@ def qqplot(x, o_y, func, p): plt.xlabel('Theoretical quantiles') plt.ylabel('Ordered Values') - plt.legend() + plt.legend(title=title) plt.draw() -def residual_plot(x, y, func, fit_res): +def residual_plot(x, y, func, fit_res, title=""): """Generates a plot which compares the fit to the data and displays the corresponding residuals + For uncorrelated data the residuals are expected to be distributed ~N(0,1). + Returns ------- None @@ -1005,7 +830,7 @@ def residual_plot(x, y, func, fit_res): ax0.set_xticklabels([]) ax0.set_xlim([xstart, xstop]) ax0.set_xticklabels([]) - ax0.legend() + ax0.legend(title=title) residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y]) ax1 = plt.subplot(gs[1]) diff --git a/tests/fits_test.py b/tests/fits_test.py index 2d849370..d596d89e 100644 --- a/tests/fits_test.py +++ b/tests/fits_test.py @@ -1,5 +1,6 @@ import numpy as np import autograd.numpy as anp +import matplotlib.pyplot as plt import math import scipy.optimize from scipy.odr import ODR, Model, RealData @@ -618,7 +619,7 @@ def test_combined_fit_vs_standard_fit(): [item.gamma_method() for item in y_const[key]] y_const_ls = np.concatenate([np.array(o) for o in y_const.values()]) x_const_ls = np.arange(0, 20) - + def func_const(a,x): return 0 * x + a[0] @@ -633,6 +634,7 @@ def test_combined_fit_vs_standard_fit(): assert np.isclose(0.0, (res[0].p_value - res[1].p_value), 1e-14, 1e-8) assert (res[0][0] - res[1][0]).is_zero(atol=1e-8) + def test_combined_fit_no_autograd(): def func_exp1(x): @@ -663,6 +665,7 @@ def test_combined_fit_no_autograd(): pe.least_squares(xs, ys, funcs, num_grad=True) + def test_combined_fit_invalid_fit_functions(): def func1(a, x): return a[0] + a[1] * x + a[2] * anp.sinh(x) + a[199] @@ -692,6 +695,7 @@ def test_combined_fit_invalid_fit_functions(): with pytest.raises(Exception): pe.least_squares({'a':xvals, 'b':xvals}, {'a':yvals, 'b':yvals}, {'a':func_valid, 'b':func}) + def test_combined_fit_invalid_input(): xvals =[] yvals =[] @@ -706,6 +710,7 @@ def test_combined_fit_invalid_input(): with pytest.raises(Exception): pe.least_squares({'a':xvals}, {'a':yvals}, {'a':func_valid}) + def test_combined_fit_no_autograd(): def func_exp1(x): @@ -774,6 +779,7 @@ def test_combined_fit_num_grad(): assert(num[0] == auto[0]) assert(num[1] == auto[1]) + def test_combined_fit_dictkeys_no_order(): def func_exp1(x): return 0.3*np.exp(0.5*x) @@ -835,6 +841,7 @@ def test_combined_fit_dictkeys_no_order(): assert(no_order_x_y[0] == order[0]) assert(no_order_x_y[1] == order[1]) + def test_correlated_combined_fit_vs_correlated_standard_fit(): x_const = {'a':[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'b':np.arange(10, 20)} @@ -861,6 +868,7 @@ def test_correlated_combined_fit_vs_correlated_standard_fit(): assert np.isclose(0.0, (res[0].t2_p_value - res[1].t2_p_value), 1e-14, 1e-8) assert (res[0][0] - res[1][0]).is_zero(atol=1e-8) + def test_combined_fit_hotelling_t(): xvals_b = np.arange(0,6) xvals_a = np.arange(0,8) @@ -888,6 +896,23 @@ def test_combined_fit_hotelling_t(): ft = pe.fits.least_squares(xs, ys, funcs, correlated_fit=True) assert ft.t2_p_value >= ft.p_value + +def test_combined_resplot_qqplot(): + x = np.arange(3) + y1 = [pe.pseudo_Obs(2 * o + np.random.normal(0, 0.1), 0.1, "test") for o in x] + y2 = [pe.pseudo_Obs(3 * o ** 2 + np.random.normal(0, 0.1), 0.1, "test") for o in x] + fr = pe.least_squares(x, y1, lambda a, x: a[0] + a[1] * x, resplot=True, qqplot=True) + + xd = {"1": x, + "2": x} + yd = {"1": y1, + "2": y2} + fd = {"1": lambda a, x: a[0] + a[1] * x, + "2": lambda a, x: a[0] + a[2] * x ** 2} + fr = pe.least_squares(xd, yd, fd, resplot=True, qqplot=True) + plt.close('all') + + def fit_general(x, y, func, silent=False, **kwargs): """Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.