This commit is contained in:
Fabian Joswig 2026-02-19 07:35:07 +01:00 committed by GitHub
commit 039b898820
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 105 additions and 40 deletions

View file

@ -7,7 +7,7 @@ import scipy.optimize
import scipy.stats import scipy.stats
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from matplotlib import gridspec from matplotlib import gridspec
from scipy.odr import ODR, Model, RealData from odrpack import odr_fit
import iminuit import iminuit
from autograd import jacobian as auto_jacobian from autograd import jacobian as auto_jacobian
from autograd import hessian as auto_hessian from autograd import hessian as auto_hessian
@ -567,7 +567,7 @@ def total_least_squares(x, y, func, silent=False, **kwargs):
Notes Notes
----- -----
Based on the orthogonal distance regression module of scipy. Based on the odrpack orthogonal distance regression library.
Returns Returns
------- -------
@ -634,17 +634,27 @@ def total_least_squares(x, y, func, silent=False, **kwargs):
raise Exception('No y errors available, run the gamma method first.') raise Exception('No y errors available, run the gamma method first.')
if 'initial_guess' in kwargs: if 'initial_guess' in kwargs:
x0 = kwargs.get('initial_guess') x0 = np.asarray(kwargs.get('initial_guess'), dtype=np.float64)
if len(x0) != n_parms: if len(x0) != n_parms:
raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms)) raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
else: else:
x0 = [1] * n_parms x0 = np.ones(n_parms, dtype=np.float64)
data = RealData(x_f, y_f, sx=dx_f, sy=dy_f) # odrpack expects f(x, beta), but pyerrors convention is f(beta, x)
model = Model(func) def wrapped_func(x, beta):
odr = ODR(data, model, x0, partol=np.finfo(np.float64).eps) return func(beta, x)
odr.set_job(fit_type=0, deriv=1)
out = odr.run() out = odr_fit(
wrapped_func,
np.asarray(x_f, dtype=np.float64),
np.asarray(y_f, dtype=np.float64),
beta0=x0,
weight_x=1.0 / np.asarray(dx_f, dtype=np.float64) ** 2,
weight_y=1.0 / np.asarray(dy_f, dtype=np.float64) ** 2,
partol=np.finfo(np.float64).eps,
task='explicit-ODR',
diff_scheme='central'
)
output.residual_variance = out.res_var output.residual_variance = out.res_var
@ -652,14 +662,24 @@ def total_least_squares(x, y, func, silent=False, **kwargs):
output.message = out.stopreason output.message = out.stopreason
output.xplus = out.xplus output.xplus = out.xplusd
if not silent: if not silent:
print('Method: ODR') print('Method: ODR')
print(*out.stopreason) print(out.stopreason)
print('Residual variance:', output.residual_variance) print('Residual variance:', output.residual_variance)
if out.info > 3: if not out.success:
# info % 5 gives the convergence status: 1=sum-of-sq, 2=param, 3=both
# If odrpack reports rank deficiency (e.g. vanishing chi-squared when
# n_obs == n_parms), convergence was still achieved allow with a warning.
if out.info % 5 in [1, 2, 3]:
warnings.warn(
"ODR fit is rank deficient. This may indicate a vanishing "
"chi-squared (n_obs == n_parms). Results may be unreliable.",
RuntimeWarning
)
else:
raise Exception('The minimization procedure did not converge.') raise Exception('The minimization procedure did not converge.')
m = x_f.size m = x_f.size
@ -679,9 +699,9 @@ def total_least_squares(x, y, func, silent=False, **kwargs):
number_of_x_parameters = int(m / x_f.shape[-1]) number_of_x_parameters = int(m / x_f.shape[-1])
old_jac = jacobian(func)(out.beta, out.xplus) old_jac = jacobian(func)(out.beta, out.xplusd)
fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0))) fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0)))
fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplus, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0]))) fused_row2 = np.concatenate((jacobian(lambda x, y: func(y, x))(out.xplusd, out.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0])))
new_jac = np.concatenate((fused_row1, fused_row2), axis=1) new_jac = np.concatenate((fused_row1, fused_row2), axis=1)
A = W @ new_jac A = W @ new_jac
@ -690,14 +710,14 @@ def total_least_squares(x, y, func, silent=False, **kwargs):
if expected_chisquare <= 0.0: if expected_chisquare <= 0.0:
warnings.warn("Negative expected_chisquare.", RuntimeWarning) warnings.warn("Negative expected_chisquare.", RuntimeWarning)
expected_chisquare = np.abs(expected_chisquare) expected_chisquare = np.abs(expected_chisquare)
output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) / expected_chisquare output.chisquare_by_expected_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplusd.ravel()))) / expected_chisquare
if not silent: if not silent:
print('chisquare/expected_chisquare:', print('chisquare/expected_chisquare:',
output.chisquare_by_expected_chisquare) output.chisquare_by_expected_chisquare)
fitp = out.beta fitp = out.beta
try: try:
hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplus.ravel()))) hess = hessian(odr_chisquare)(np.concatenate((fitp, out.xplusd.ravel())))
except TypeError: except TypeError:
raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
@ -706,7 +726,7 @@ def total_least_squares(x, y, func, silent=False, **kwargs):
chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
return chisq return chisq
jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel()))) jac_jac_x = hessian(odr_chisquare_compact_x)(np.concatenate((fitp, out.xplusd.ravel(), x_f.ravel())))
# Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv # Compute hess^{-1} @ jac_jac_x[:n_parms + m, n_parms + m:] using LAPACK dgesv
try: try:
@ -719,7 +739,7 @@ def total_least_squares(x, y, func, silent=False, **kwargs):
chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
return chisq return chisq
jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplus.ravel(), y_f))) jac_jac_y = hessian(odr_chisquare_compact_y)(np.concatenate((fitp, out.xplusd.ravel(), y_f)))
# Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv
try: try:
@ -733,7 +753,7 @@ def total_least_squares(x, y, func, silent=False, **kwargs):
output.fit_parameters = result output.fit_parameters = result
output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplusd.ravel())))
output.dof = x.shape[-1] - n_parms output.dof = x.shape[-1] - n_parms
output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof) output.p_value = 1 - scipy.stats.chi2.cdf(output.odr_chisquare, output.dof)

View file

@ -25,7 +25,7 @@ setup(name='pyerrors',
license="MIT", license="MIT",
packages=find_packages(), packages=find_packages(),
python_requires='>=3.10.0', python_requires='>=3.10.0',
install_requires=['numpy>=2.0', 'autograd>=1.7.0', 'numdifftools>=0.9.41', 'matplotlib>=3.9', 'scipy>=1.13', 'iminuit>=2.28', 'h5py>=3.11', 'lxml>=5.0', 'python-rapidjson>=1.20', 'pandas>=2.2'], install_requires=['numpy>=2.0', 'autograd>=1.7.0', 'numdifftools>=0.9.41', 'matplotlib>=3.9', 'scipy>=1.13', 'iminuit>=2.28', 'h5py>=3.11', 'lxml>=5.0', 'python-rapidjson>=1.20', 'pandas>=2.2', 'odrpack>=0.4'],
extras_require={'test': ['pytest', 'pytest-cov', 'pytest-benchmark', 'hypothesis', 'nbmake', 'flake8']}, extras_require={'test': ['pytest', 'pytest-cov', 'pytest-benchmark', 'hypothesis', 'nbmake', 'flake8']},
classifiers=[ classifiers=[
'Development Status :: 5 - Production/Stable', 'Development Status :: 5 - Production/Stable',

View file

@ -3,7 +3,7 @@ import autograd.numpy as anp
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import math import math
import scipy.optimize import scipy.optimize
from scipy.odr import ODR, Model, RealData from odrpack import odr_fit
from scipy.linalg import cholesky from scipy.linalg import cholesky
from scipy.stats import norm from scipy.stats import norm
import iminuit import iminuit
@ -397,11 +397,21 @@ def test_total_least_squares():
y = a[0] * anp.exp(-a[1] * x) y = a[0] * anp.exp(-a[1] * x)
return y return y
data = RealData([o.value for o in ox], [o.value for o in oy], sx=[o.dvalue for o in ox], sy=[o.dvalue for o in oy]) # odrpack expects f(x, beta), but pyerrors convention is f(beta, x)
model = Model(func) def wrapped_func(x, beta):
odr = ODR(data, model, [0, 0], partol=np.finfo(np.float64).eps) return func(beta, x)
odr.set_job(fit_type=0, deriv=1)
output = odr.run() output = odr_fit(
wrapped_func,
np.array([o.value for o in ox]),
np.array([o.value for o in oy]),
beta0=np.array([0.0, 0.0]),
weight_x=1.0 / np.array([o.dvalue for o in ox]) ** 2,
weight_y=1.0 / np.array([o.dvalue for o in oy]) ** 2,
partol=np.finfo(np.float64).eps,
task='explicit-ODR',
diff_scheme='central'
)
out = pe.total_least_squares(ox, oy, func, expected_chisquare=True) out = pe.total_least_squares(ox, oy, func, expected_chisquare=True)
beta = out.fit_parameters beta = out.fit_parameters
@ -458,6 +468,19 @@ def test_total_least_squares():
assert((outc.fit_parameters[1] - betac[1]).is_zero()) assert((outc.fit_parameters[1] - betac[1]).is_zero())
def test_total_least_squares_vanishing_chisquare():
"""Test that a saturated fit (n_obs == n_parms) works without exception."""
def func(a, x):
return a[0] + a[1] * x
x = [pe.pseudo_Obs(1.0, 0.1, 'x0'), pe.pseudo_Obs(2.0, 0.1, 'x1')]
y = [pe.pseudo_Obs(1.0, 0.1, 'y0'), pe.pseudo_Obs(2.0, 0.1, 'y1')]
with pytest.warns(RuntimeWarning, match="rank deficient"):
out = pe.total_least_squares(x, y, func, silent=True)
assert len(out.fit_parameters) == 2
def test_odr_derivatives(): def test_odr_derivatives():
x = [] x = []
y = [] y = []
@ -1431,11 +1454,11 @@ def fit_general(x, y, func, silent=False, **kwargs):
global print_output, beta0 global print_output, beta0
print_output = 1 print_output = 1
if 'initial_guess' in kwargs: if 'initial_guess' in kwargs:
beta0 = kwargs.get('initial_guess') beta0 = np.asarray(kwargs.get('initial_guess'), dtype=np.float64)
if len(beta0) != n_parms: if len(beta0) != n_parms:
raise Exception('Initial guess does not have the correct length.') raise Exception('Initial guess does not have the correct length.')
else: else:
beta0 = np.arange(n_parms) beta0 = np.arange(n_parms, dtype=np.float64)
if len(x) != len(y): if len(x) != len(y):
raise Exception('x and y have to have the same length') raise Exception('x and y have to have the same length')
@ -1463,23 +1486,45 @@ def fit_general(x, y, func, silent=False, **kwargs):
xerr = kwargs.get('xerr') xerr = kwargs.get('xerr')
# odrpack expects f(x, beta), but pyerrors convention is f(beta, x)
def wrapped_func(x, beta):
return func(beta, x)
if length == len(obs): if length == len(obs):
assert 'x_constants' in kwargs assert 'x_constants' in kwargs
data = RealData(kwargs.get('x_constants'), obs, sy=yerr) x_data = np.asarray(kwargs.get('x_constants'))
fit_type = 2 y_data = np.asarray(obs)
# Ordinary least squares (no x errors)
output = odr_fit(
wrapped_func,
x_data,
y_data,
beta0=beta0,
weight_y=1.0 / np.asarray(yerr) ** 2,
partol=np.finfo(np.float64).eps,
task='explicit-OLS',
diff_scheme='central'
)
elif length == len(obs) // 2: elif length == len(obs) // 2:
data = RealData(obs[:length], obs[length:], sx=xerr, sy=yerr) x_data = np.asarray(obs[:length])
fit_type = 0 y_data = np.asarray(obs[length:])
# ODR with x errors
output = odr_fit(
wrapped_func,
x_data,
y_data,
beta0=beta0,
weight_x=1.0 / np.asarray(xerr) ** 2,
weight_y=1.0 / np.asarray(yerr) ** 2,
partol=np.finfo(np.float64).eps,
task='explicit-ODR',
diff_scheme='central'
)
else: else:
raise Exception('x and y do not fit together.') raise Exception('x and y do not fit together.')
model = Model(func)
odr = ODR(data, model, beta0, partol=np.finfo(np.float64).eps)
odr.set_job(fit_type=fit_type, deriv=1)
output = odr.run()
if print_output and not silent: if print_output and not silent:
print(*output.stopreason) print(output.stopreason)
print('chisquare/d.o.f.:', output.res_var) print('chisquare/d.o.f.:', output.res_var)
print_output = 0 print_output = 0
beta0 = output.beta beta0 = output.beta