fix/tests: Combined fit now also works when the keys of the x,y & func input dictionaries are not in the same order, build: improvements in performance

This commit is contained in:
ppetrak 2023-01-30 14:26:47 +01:00
parent 32dcb7438c
commit 80371a0898
2 changed files with 129 additions and 35 deletions

View file

@ -102,9 +102,6 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
OR For a combined fit:
Do not need to use ordered dictionaries: python version >= 3.7: Dictionary order is guaranteed to be insertion order.
(https://docs.python.org/3/library/stdtypes.html#dict-views) Ensures that x, y and func values are mapped correctly.
x : dict
dict of lists.
y : dict
@ -142,7 +139,10 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
migrad of iminuit. If no method is specified, Levenberg-Marquard is used.
Reliable alternatives are migrad, Powell and Nelder-Mead.
tol: float, optional
can only be used for combined fits and methods other than Levenberg-Marquard
can be used (only for combined fits and methods other than Levenberg-Marquard) to set the tolerance for convergence
to a different value to either speed up convergence at the cost of a larger error on the fitted parameters (and possibly
invalid estimates for parameter uncertainties) or smaller values to get more accurate parameter values
The stopping criterion depends on the method, e.g. migrad: edm_max = 0.002 * tol * errordef (EDM criterion: edm < edm_max)
correlated_fit : bool
If True, use the full inverse covariance matrix in the definition of the chisquare cost function.
For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
@ -705,8 +705,19 @@ def _combined_fit(x, y, func, silent=False, **kwargs):
jacobian = auto_jacobian
hessian = auto_hessian
x_all = np.concatenate([np.array(o) for o in x.values()])
y_all = np.concatenate([np.array(o) for o in y.values()])
key_ls = sorted(list(x.keys()))
if sorted(list(y.keys())) != key_ls:
raise Exception('x and y dictionaries do not contain the same keys.')
if sorted(list(func.keys())) != key_ls:
raise Exception('x and func dictionaries do not contain the same keys.')
if sorted(list(func.keys())) != sorted(list(y.keys())):
raise Exception('y and func dictionaries do not contain the same keys.')
x_all = np.concatenate([np.array(x[key]) for key in key_ls])
y_all = np.concatenate([np.array(y[key]) for key in key_ls])
y_f = [o.value for o in y_all]
dy_f = [o.dvalue for o in y_all]
@ -716,12 +727,12 @@ def _combined_fit(x, y, func, silent=False, **kwargs):
# number of fit parameters
n_parms_ls = []
for key in func.keys():
for key in key_ls:
if not callable(func[key]):
raise TypeError('func (key=' + key + ') is not a function.')
if len(x[key]) != len(y[key]):
raise Exception('x and y input (key=' + key + ') do not have the same length')
for i in range(42):
for i in range(100):
try:
func[key](np.arange(i), x_all.T[0])
except TypeError:
@ -746,15 +757,9 @@ def _combined_fit(x, y, func, silent=False, **kwargs):
x0 = [0.1] * n_parms
def chisqfunc(p):
chisq = 0.0
for key in func.keys():
x_array = np.asarray(x[key])
model = anp.array(func[key](p, x_array))
y_obs = y[key]
y_f = [o.value for o in y_obs]
dy_f = [o.dvalue for o in y_obs]
C_inv = np.diag(np.diag(np.ones((len(x_array), len(x_array))))) / dy_f / dy_f
chisq += anp.sum((y_f - model) @ C_inv @ (y_f - model))
func_list = np.concatenate([[func[k]] * len(x[k]) for k in key_ls])
model = anp.array([func_list[i](p, x_all[i]) for i in range(len(x_all))])
chisq = anp.sum(((y_f - model) / dy_f) ** 2)
return chisq
output.method = kwargs.get('method', 'Levenberg-Marquardt')
@ -763,7 +768,7 @@ def _combined_fit(x, y, func, silent=False, **kwargs):
if output.method != 'Levenberg-Marquardt':
if output.method == 'migrad':
tolerance = 1e-4
tolerance = 1e-1 # default value set by iminuit
if 'tol' in kwargs:
tolerance = kwargs.get('tol')
fit_result = iminuit.minimize(chisqfunc, x0, tol=tolerance) # Stopping criterion 0.002 * tol * errordef
@ -779,7 +784,7 @@ def _combined_fit(x, y, func, silent=False, **kwargs):
else:
def chisqfunc_residuals(p):
model = np.concatenate([np.array(func[key](p, np.asarray(x[key]))) for key in func.keys()])
model = np.concatenate([np.array(func[key](p, np.asarray(x[key]))) for key in key_ls])
chisq = ((y_f - model) / dy_f)
return chisq
if 'tol' in kwargs:
@ -809,25 +814,14 @@ def _combined_fit(x, y, func, silent=False, **kwargs):
print('fit parameters', fit_result.x)
def chisqfunc_compact(d):
chisq = 0.0
list_tmp = []
c1 = 0
c2 = 0
for key in func.keys():
x_array = np.asarray(x[key])
c2 += len(x_array)
model = anp.array(func[key](d[:n_parms], x_array))
y_obs = y[key]
dy_f = [o.dvalue for o in y_obs]
C_inv = np.diag(np.diag(np.ones((len(x_array), len(x_array))))) / dy_f / dy_f
list_tmp.append(anp.sum((d[n_parms + c1:n_parms + c2] - model) @ C_inv @ (d[n_parms + c1:n_parms + c2] - model)))
c1 += len(x_array)
chisq = anp.sum(list_tmp)
func_list = np.concatenate([[func[k]] * len(x[k]) for k in key_ls])
model = anp.array([func_list[i](d[:n_parms], x_all[i]) for i in range(len(x_all))])
chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2)
return chisq
def prepare_hat_matrix():
hat_vector = []
for key in func.keys():
for key in key_ls:
x_array = np.asarray(x[key])
if (len(x_array) != 0):
hat_vector.append(jacobian(func[key])(fit_result.x, x_array))

View file

@ -618,7 +618,7 @@ def test_combined_fit_vs_standard_fit():
[item.gamma_method() for item in y_const[key]]
y_const_ls = np.concatenate([np.array(o) for o in y_const.values()])
x_const_ls = np.arange(0, 20)
def func_const(a,x):
return 0 * x + a[0]
@ -633,6 +633,35 @@ def test_combined_fit_vs_standard_fit():
assert np.isclose(0.0, (res[0].p_value - res[1].p_value), 1e-14, 1e-8)
assert (res[0][0] - res[1][0]).is_zero(atol=1e-8)
def test_combined_fit_no_autograd():
def func_exp1(x):
return 0.3*np.exp(0.5*x)
def func_exp2(x):
return 0.3*np.exp(0.8*x)
xvals_b = np.arange(0,6)
xvals_a = np.arange(0,8)
def func_a(a,x):
return a[0]*np.exp(a[1]*x)
def func_b(a,x):
return a[0]*np.exp(a[2]*x)
funcs = {'a':func_a, 'b':func_b}
xs = {'a':xvals_a, 'b':xvals_b}
ys = {'a':[pe.Obs([np.random.normal(item, item*1.5, 1000)],['ensemble1']) for item in func_exp1(xvals_a)],
'b':[pe.Obs([np.random.normal(item, item*1.4, 1000)],['ensemble1']) for item in func_exp2(xvals_b)]}
for key in funcs.keys():
[item.gamma_method() for item in ys[key]]
with pytest.raises(Exception):
pe.least_squares(xs, ys, funcs)
pe.least_squares(xs, ys, funcs, num_grad=True)
def test_combined_fit_invalid_fit_functions():
def func1(a, x):
@ -663,6 +692,17 @@ def test_combined_fit_invalid_fit_functions():
with pytest.raises(Exception):
pe.least_squares({'a':xvals, 'b':xvals}, {'a':yvals, 'b':yvals}, {'a':func_valid, 'b':func})
def test_combined_fit_invalid_input():
xvals =[]
yvals =[]
err = 0.1
def func_valid(a,x):
return a[0] + a[1] * x
for x in range(1, 8, 2):
xvals.append(x)
yvals.append(pe.pseudo_Obs(x + np.random.normal(0.0, err), err, 'test1') + pe.pseudo_Obs(0, err / 100, 'test2', samples=87))
with pytest.raises(Exception):
pe.least_squares({'a':xvals}, {'b':yvals}, {'a':func_valid})
def test_combined_fit_no_autograd():
@ -732,6 +772,66 @@ def test_combined_fit_num_grad():
assert(num[0] == auto[0])
assert(num[1] == auto[1])
def test_combined_fit_dictkeys_no_order():
def func_exp1(x):
return 0.3*np.exp(0.5*x)
def func_exp2(x):
return 0.3*np.exp(0.8*x)
xvals_b = np.arange(0,6)
xvals_a = np.arange(0,8)
def func_num_a(a,x):
return a[0]*np.exp(a[1]*x)
def func_num_b(a,x):
return a[0]*np.exp(a[2]*x)
def func_auto_a(a,x):
return a[0]*anp.exp(a[1]*x)
def func_auto_b(a,x):
return a[0]*anp.exp(a[2]*x)
funcs = {'a':func_auto_a, 'b':func_auto_b}
funcs_no_order = {'b':func_auto_b, 'a':func_auto_a}
xs = {'a':xvals_a, 'b':xvals_b}
xs_no_order = {'b':xvals_b, 'a':xvals_a}
yobs_a = [pe.Obs([np.random.normal(item, item*1.5, 1000)],['ensemble1']) for item in func_exp1(xvals_a)]
yobs_b = [pe.Obs([np.random.normal(item, item*1.4, 1000)],['ensemble1']) for item in func_exp2(xvals_b)]
ys = {'a': yobs_a, 'b': yobs_b}
ys_no_order = {'b': yobs_b, 'a': yobs_a}
for key in funcs.keys():
[item.gamma_method() for item in ys[key]]
[item.gamma_method() for item in ys_no_order[key]]
for method_kw in ['Levenberg-Marquardt', 'migrad', 'Powell', 'Nelder-Mead']:
order = pe.fits.least_squares(xs, ys, funcs,method = method_kw)
no_order_func = pe.fits.least_squares(xs, ys, funcs_no_order,method = method_kw)
no_order_x = pe.fits.least_squares(xs_no_order, ys, funcs,method = method_kw)
no_order_y = pe.fits.least_squares(xs, ys_no_order, funcs,method = method_kw)
no_order_func_x = pe.fits.least_squares(xs_no_order, ys, funcs_no_order,method = method_kw)
no_order_func_y = pe.fits.least_squares(xs, ys_no_order, funcs_no_order,method = method_kw)
no_order_x_y = pe.fits.least_squares(xs_no_order, ys_no_order, funcs,method = method_kw)
assert(no_order_func[0] == order[0])
assert(no_order_func[1] == order[1])
assert(no_order_x[0] == order[0])
assert(no_order_x[1] == order[1])
assert(no_order_y[0] == order[0])
assert(no_order_y[1] == order[1])
assert(no_order_func_x[0] == order[0])
assert(no_order_func_x[1] == order[1])
assert(no_order_func_y[0] == order[0])
assert(no_order_func_y[1] == order[1])
assert(no_order_x_y[0] == order[0])
assert(no_order_x_y[1] == order[1])
def fit_general(x, y, func, silent=False, **kwargs):
"""Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.