From 839d9214ed8d520501f21b2438fb24118f219241 Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Tue, 7 Mar 2023 16:15:16 +0000
Subject: [PATCH] Improved prior fit (#161)

* refactor: merged combined fit and prior fit without breaking the
routine. Fitting with priors does not work yet.

* refactor: correlated fits without priors work now.

* refactor: prior error propagation and dof fixed.

* refactor: old prior fit implementation moved to tests.

* refactor: moved _extract_val_and_dval out of least_squares.

* refactor: comment removed.

* tests: additional tests and exceptions added.

* tests: test for constrained prior fit added.

* docs: least_squares docstring extended.

* fix: linting errors fixed.

* feat: additional if cause for fits without priors added to achieve
original speed.

* tests: test_constrained_and_prior_fit fixed.

* fix: fix array cast of least_squares dict mode.

* tests: test for lists in dict fit added.

* fix: additional asarray added in resplot.

Co-authored-by: Simon Kuberski <simon.kuberski@uni-muenster.de>
---
 pyerrors/fits.py   | 655 ++++++++++++++++++++-------------------------
 tests/fits_test.py | 256 ++++++++++++++++++
 tests/obs_test.py  |   2 +-
 3 files changed, 549 insertions(+), 364 deletions(-)

diff --git a/pyerrors/fits.py b/pyerrors/fits.py
index 0ddfd54a..da8c8ff6 100644
--- a/pyerrors/fits.py
+++ b/pyerrors/fits.py
@@ -125,8 +125,9 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
         It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
         will not work.
 
-    priors : list, optional
-        priors has to be a list with an entry for every parameter in the fit. The entries can either be
+    priors : dict or list, optional
+        priors can either be a dictionary with integer keys and the corresponding priors as values or
+        a list with an entry for every parameter in the fit. The entries can either be
         Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like
         0.548(23), 500(40) or 0.5(0.4)
     silent : bool, optional
@@ -166,10 +167,286 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
     output : Fit_result
         Parameters and information on the fitted result.
     '''
-    if priors is not None:
-        return _prior_fit(x, y, func, priors, silent=silent, **kwargs)
+    output = Fit_result()
+
+    if (type(x) == dict and type(y) == dict and type(func) == dict):
+        xd = {key: anp.asarray(x[key]) for key in x}
+        yd = y
+        funcd = func
+        output.fit_function = func
+    elif (type(x) == dict or type(y) == dict or type(func) == dict):
+        raise TypeError("All arguments have to be dictionaries in order to perform a combined fit.")
     else:
-        return _combined_fit(x, y, func, silent=silent, **kwargs)
+        x = np.asarray(x)
+        xd = {"": x}
+        yd = {"": y}
+        funcd = {"": func}
+        output.fit_function = func
+
+    if kwargs.get('num_grad') is True:
+        jacobian = num_jacobian
+        hessian = num_hessian
+    else:
+        jacobian = auto_jacobian
+        hessian = auto_hessian
+
+    key_ls = sorted(list(xd.keys()))
+
+    if sorted(list(yd.keys())) != key_ls:
+        raise Exception('x and y dictionaries do not contain the same keys.')
+
+    if sorted(list(funcd.keys())) != key_ls:
+        raise Exception('x and func dictionaries do not contain the same keys.')
+
+    x_all = np.concatenate([np.array(xd[key]) for key in key_ls])
+    y_all = np.concatenate([np.array(yd[key]) for key in key_ls])
+
+    y_f = [o.value for o in y_all]
+    dy_f = [o.dvalue for o in y_all]
+
+    if len(x_all.shape) > 2:
+        raise Exception('Unknown format for x values')
+
+    if np.any(np.asarray(dy_f) <= 0.0):
+        raise Exception('No y errors available, run the gamma method first.')
+
+    # number of fit parameters
+    n_parms_ls = []
+    for key in key_ls:
+        if not callable(funcd[key]):
+            raise TypeError('func (key=' + key + ') is not a function.')
+        if np.asarray(xd[key]).shape[-1] != len(yd[key]):
+            raise Exception('x and y input (key=' + key + ') do not have the same length')
+        for i in range(100):
+            try:
+                funcd[key](np.arange(i), x_all.T[0])
+            except TypeError:
+                continue
+            except IndexError:
+                continue
+            else:
+                break
+        else:
+            raise RuntimeError("Fit function (key=" + key + ") is not valid.")
+        n_parms = i
+        n_parms_ls.append(n_parms)
+    n_parms = max(n_parms_ls)
+    if not silent:
+        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
+
+    if priors is not None:
+        if isinstance(priors, (list, np.ndarray)):
+            if n_parms != len(priors):
+                raise ValueError("'priors' does not have the correct length.")
+
+            loc_priors = []
+            for i_n, i_prior in enumerate(priors):
+                if isinstance(i_prior, Obs):
+                    loc_priors.append(i_prior)
+                elif isinstance(i_prior, str):
+                    loc_val, loc_dval = _extract_val_and_dval(i_prior)
+                    loc_priors.append(cov_Obs(loc_val, loc_dval ** 2, '#prior' + str(i_n) + f"_{np.random.randint(2147483647):010d}"))
+                else:
+                    raise TypeError("Prior entries need to be 'Obs' or 'str'.")
+
+            prior_mask = np.arange(len(priors))
+            output.priors = loc_priors
+
+        elif isinstance(priors, dict):
+            loc_priors = []
+            prior_mask = []
+            output.priors = {}
+            for pos, prior in priors.items():
+                if isinstance(pos, int):
+                    prior_mask.append(pos)
+                else:
+                    raise TypeError("Prior position needs to be an integer.")
+                if isinstance(prior, Obs):
+                    loc_priors.append(prior)
+                elif isinstance(prior, str):
+                    loc_val, loc_dval = _extract_val_and_dval(prior)
+                    loc_priors.append(cov_Obs(loc_val, loc_dval ** 2, '#prior' + str(pos) + f"_{np.random.randint(2147483647):010d}"))
+                else:
+                    raise TypeError("Prior entries need to be 'Obs' or 'str'.")
+                output.priors[pos] = loc_priors[-1]
+            if max(prior_mask) >= n_parms:
+                raise ValueError("Prior position out of range.")
+        else:
+            raise TypeError("Unkown type for `priors`.")
+
+        p_f = [o.value for o in loc_priors]
+        dp_f = [o.dvalue for o in loc_priors]
+        if np.any(np.asarray(dp_f) <= 0.0):
+            raise Exception('No prior errors available, run the gamma method first.')
+    else:
+        p_f = dp_f = np.array([])
+        prior_mask = []
+        loc_priors = []
+
+    if 'initial_guess' in kwargs:
+        x0 = kwargs.get('initial_guess')
+        if len(x0) != n_parms:
+            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
+    else:
+        x0 = [0.1] * n_parms
+
+    if priors is None:
+        def general_chisqfunc_uncorr(p, ivars, pr):
+            model = anp.concatenate([anp.array(funcd[key](p, xd[key])).reshape(-1) for key in key_ls])
+            return (ivars - model) / dy_f
+    else:
+        def general_chisqfunc_uncorr(p, ivars, pr):
+            model = anp.concatenate([anp.array(funcd[key](p, xd[key])).reshape(-1) for key in key_ls])
+            return anp.concatenate(((ivars - model) / dy_f, (p[prior_mask] - pr) / dp_f))
+
+    def chisqfunc_uncorr(p):
+        return anp.sum(general_chisqfunc_uncorr(p, y_f, p_f) ** 2)
+
+    if kwargs.get('correlated_fit') is True:
+        corr = covariance(y_all, correlation=True, **kwargs)
+        covdiag = np.diag(1 / np.asarray(dy_f))
+        condn = np.linalg.cond(corr)
+        if condn > 0.1 / np.finfo(float).eps:
+            raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
+        if condn > 1e13:
+            warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
+        chol = np.linalg.cholesky(corr)
+        chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
+
+        def general_chisqfunc(p, ivars, pr):
+            model = anp.concatenate([anp.array(funcd[key](p, xd[key])).reshape(-1) for key in key_ls])
+            return anp.concatenate((anp.dot(chol_inv, (ivars - model)), (p[prior_mask] - pr) / dp_f))
+
+        def chisqfunc(p):
+            return anp.sum(general_chisqfunc(p, y_f, p_f) ** 2)
+    else:
+        general_chisqfunc = general_chisqfunc_uncorr
+        chisqfunc = chisqfunc_uncorr
+
+    output.method = kwargs.get('method', 'Levenberg-Marquardt')
+    if not silent:
+        print('Method:', output.method)
+
+    if output.method != 'Levenberg-Marquardt':
+        if output.method == 'migrad':
+            tolerance = 1e-4  # default value of 1e-1 set by iminuit can be problematic
+            if 'tol' in kwargs:
+                tolerance = kwargs.get('tol')
+            fit_result = iminuit.minimize(chisqfunc_uncorr, x0, tol=tolerance)  # Stopping criterion 0.002 * tol * errordef
+            if kwargs.get('correlated_fit') is True:
+                fit_result = iminuit.minimize(chisqfunc, fit_result.x, tol=tolerance)
+            output.iterations = fit_result.nfev
+        else:
+            tolerance = 1e-12
+            if 'tol' in kwargs:
+                tolerance = kwargs.get('tol')
+            fit_result = scipy.optimize.minimize(chisqfunc_uncorr, x0, method=kwargs.get('method'), tol=tolerance)
+            if kwargs.get('correlated_fit') is True:
+                fit_result = scipy.optimize.minimize(chisqfunc, fit_result.x, method=kwargs.get('method'), tol=tolerance)
+            output.iterations = fit_result.nit
+
+        chisquare = fit_result.fun
+
+    else:
+        if 'tol' in kwargs:
+            print('tol cannot be set for Levenberg-Marquardt')
+
+        def chisqfunc_residuals_uncorr(p):
+            return general_chisqfunc_uncorr(p, y_f, p_f)
+
+        fit_result = scipy.optimize.least_squares(chisqfunc_residuals_uncorr, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
+        if kwargs.get('correlated_fit') is True:
+
+            def chisqfunc_residuals(p):
+                return general_chisqfunc(p, y_f, p_f)
+
+            fit_result = scipy.optimize.least_squares(chisqfunc_residuals, fit_result.x, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
+
+        chisquare = np.sum(fit_result.fun ** 2)
+        assert np.isclose(chisquare, chisqfunc(fit_result.x), atol=1e-14)
+
+        output.iterations = fit_result.nfev
+
+    if not fit_result.success:
+        raise Exception('The minimization procedure did not converge.')
+
+    if x_all.shape[-1] - n_parms > 0:
+        output.chisquare = chisquare
+        output.dof = x_all.shape[-1] - n_parms + len(loc_priors)
+        output.chisquare_by_dof = output.chisquare / output.dof
+        output.p_value = 1 - scipy.stats.chi2.cdf(output.chisquare, output.dof)
+    else:
+        output.chisquare_by_dof = float('nan')
+
+    output.message = fit_result.message
+    if not silent:
+        print(fit_result.message)
+        print('chisquare/d.o.f.:', output.chisquare_by_dof)
+        print('fit parameters', fit_result.x)
+
+    def prepare_hat_matrix():
+        hat_vector = []
+        for key in key_ls:
+            if (len(xd[key]) != 0):
+                hat_vector.append(jacobian(funcd[key])(fit_result.x, xd[key]))
+        hat_vector = [item for sublist in hat_vector for item in sublist]
+        return hat_vector
+
+    if kwargs.get('expected_chisquare') is True:
+        if kwargs.get('correlated_fit') is not True:
+            W = np.diag(1 / np.asarray(dy_f))
+            cov = covariance(y_all)
+            hat_vector = prepare_hat_matrix()
+            A = W @ hat_vector
+            P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
+            expected_chisquare = np.trace((np.identity(x_all.shape[-1]) - P_phi) @ W @ cov @ W)
+            output.chisquare_by_expected_chisquare = output.chisquare / expected_chisquare
+            if not silent:
+                print('chisquare/expected_chisquare:', output.chisquare_by_expected_chisquare)
+
+    fitp = fit_result.x
+    if np.any(np.asarray(dy_f) <= 0.0):
+        raise Exception('No y errors available, run the gamma method first.')
+
+    try:
+        hess = hessian(chisqfunc)(fitp)
+    except TypeError:
+        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
+
+    len_y = len(y_f)
+
+    def chisqfunc_compact(d):
+        return anp.sum(general_chisqfunc(d[:n_parms], d[n_parms: n_parms + len_y], d[n_parms + len_y:]) ** 2)
+
+    jac_jac_y = hessian(chisqfunc_compact)(np.concatenate((fitp, y_f, p_f)))
+
+    # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv
+    try:
+        deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms, n_parms:])
+    except np.linalg.LinAlgError:
+        raise Exception("Cannot invert hessian matrix.")
+
+    result = []
+    for i in range(n_parms):
+        result.append(derived_observable(lambda x_all, **kwargs: (x_all[0] + np.finfo(np.float64).eps) / (y_all[0].value + np.finfo(np.float64).eps) * fitp[i], list(y_all) + loc_priors, man_grad=list(deriv_y[i])))
+
+    output.fit_parameters = result
+
+    # Hotelling t-squared p-value for correlated fits.
+    if kwargs.get('correlated_fit') is True:
+        n_cov = np.min(np.vectorize(lambda x_all: x_all.N)(y_all))
+        output.t2_p_value = 1 - scipy.stats.f.cdf((n_cov - output.dof) / (output.dof * (n_cov - 1)) * output.chisquare,
+                                                  output.dof, n_cov - output.dof)
+
+    if kwargs.get('resplot') is True:
+        for key in key_ls:
+            residual_plot(xd[key], yd[key], funcd[key], result, title=key)
+
+    if kwargs.get('qqplot') is True:
+        for key in key_ls:
+            qqplot(xd[key], yd[key], funcd[key], result, title=key)
+
+    return output
 
 
 def total_least_squares(x, y, func, silent=False, **kwargs):
@@ -376,363 +653,6 @@ def total_least_squares(x, y, func, silent=False, **kwargs):
     return output
 
 
-def _prior_fit(x, y, func, priors, silent=False, **kwargs):
-    output = Fit_result()
-
-    output.fit_function = func
-
-    x = np.asarray(x)
-
-    if kwargs.get('num_grad') is True:
-        hessian = num_hessian
-    else:
-        hessian = auto_hessian
-
-    if not callable(func):
-        raise TypeError('func has to be a function.')
-
-    for i in range(100):
-        try:
-            func(np.arange(i), 0)
-        except TypeError:
-            continue
-        except IndexError:
-            continue
-        else:
-            break
-    else:
-        raise RuntimeError("Fit function is not valid.")
-
-    n_parms = i
-
-    if n_parms != len(priors):
-        raise Exception('Priors does not have the correct length.')
-
-    def extract_val_and_dval(string):
-        split_string = string.split('(')
-        if '.' in split_string[0] and '.' not in split_string[1][:-1]:
-            factor = 10 ** -len(split_string[0].partition('.')[2])
-        else:
-            factor = 1
-        return float(split_string[0]), float(split_string[1][:-1]) * factor
-
-    loc_priors = []
-    for i_n, i_prior in enumerate(priors):
-        if isinstance(i_prior, Obs):
-            loc_priors.append(i_prior)
-        else:
-            loc_val, loc_dval = extract_val_and_dval(i_prior)
-            loc_priors.append(cov_Obs(loc_val, loc_dval ** 2, '#prior' + str(i_n) + f"_{np.random.randint(2147483647):010d}"))
-
-    output.priors = loc_priors
-
-    if not silent:
-        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
-
-    y_f = [o.value for o in y]
-    dy_f = [o.dvalue for o in y]
-
-    if np.any(np.asarray(dy_f) <= 0.0):
-        raise Exception('No y errors available, run the gamma method first.')
-
-    p_f = [o.value for o in loc_priors]
-    dp_f = [o.dvalue for o in loc_priors]
-
-    if np.any(np.asarray(dp_f) <= 0.0):
-        raise Exception('No prior errors available, run the gamma method first.')
-
-    if 'initial_guess' in kwargs:
-        x0 = kwargs.get('initial_guess')
-        if len(x0) != n_parms:
-            raise Exception('Initial guess does not have the correct length.')
-    else:
-        x0 = p_f
-
-    def chisqfunc(p):
-        model = func(p, x)
-        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((p_f - p) / dp_f) ** 2)
-        return chisq
-
-    if not silent:
-        print('Method: migrad')
-
-    m = iminuit.Minuit(chisqfunc, x0)
-    m.errordef = 1
-    m.print_level = 0
-    if 'tol' in kwargs:
-        m.tol = kwargs.get('tol')
-    else:
-        m.tol = 1e-4
-    m.migrad()
-    params = np.asarray(m.values)
-
-    output.chisquare_by_dof = m.fval / len(x)
-
-    output.method = 'migrad'
-
-    if not silent:
-        print('chisquare/d.o.f.:', output.chisquare_by_dof)
-
-    if not m.fmin.is_valid:
-        raise Exception('The minimization procedure did not converge.')
-
-    hess = hessian(chisqfunc)(params)
-    hess_inv = np.linalg.pinv(hess)
-
-    def chisqfunc_compact(d):
-        model = func(d[:n_parms], x)
-        chisq = anp.sum(((d[n_parms: n_parms + len(x)] - model) / dy_f) ** 2) + anp.sum(((d[n_parms + len(x):] - d[:n_parms]) / dp_f) ** 2)
-        return chisq
-
-    jac_jac = hessian(chisqfunc_compact)(np.concatenate((params, y_f, p_f)))
-
-    deriv = -hess_inv @ jac_jac[:n_parms, n_parms:]
-
-    result = []
-    for i in range(n_parms):
-        result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * params[i], list(y) + list(loc_priors), man_grad=list(deriv[i])))
-
-    output.fit_parameters = result
-    output.chisquare = chisqfunc(np.asarray(params))
-
-    if kwargs.get('resplot') is True:
-        residual_plot(x, y, func, result)
-
-    if kwargs.get('qqplot') is True:
-        qqplot(x, y, func, result)
-
-    return output
-
-
-def _combined_fit(x, y, func, silent=False, **kwargs):
-
-    output = Fit_result()
-
-    if (type(x) == dict and type(y) == dict and type(func) == dict):
-        xd = x
-        yd = y
-        funcd = func
-        output.fit_function = func
-    elif (type(x) == dict or type(y) == dict or type(func) == dict):
-        raise TypeError("All arguments have to be dictionaries in order to perform a combined fit.")
-    else:
-        x = np.asarray(x)
-        xd = {"": x}
-        yd = {"": y}
-        funcd = {"": func}
-        output.fit_function = func
-
-    if kwargs.get('num_grad') is True:
-        jacobian = num_jacobian
-        hessian = num_hessian
-    else:
-        jacobian = auto_jacobian
-        hessian = auto_hessian
-
-    key_ls = sorted(list(xd.keys()))
-
-    if sorted(list(yd.keys())) != key_ls:
-        raise Exception('x and y dictionaries do not contain the same keys.')
-
-    if sorted(list(funcd.keys())) != key_ls:
-        raise Exception('x and func dictionaries do not contain the same keys.')
-
-    x_all = np.concatenate([np.array(xd[key]) for key in key_ls])
-    y_all = np.concatenate([np.array(yd[key]) for key in key_ls])
-
-    y_f = [o.value for o in y_all]
-    dy_f = [o.dvalue for o in y_all]
-
-    if len(x_all.shape) > 2:
-        raise Exception('Unknown format for x values')
-
-    if np.any(np.asarray(dy_f) <= 0.0):
-        raise Exception('No y errors available, run the gamma method first.')
-
-    # number of fit parameters
-    n_parms_ls = []
-    for key in key_ls:
-        if not callable(funcd[key]):
-            raise TypeError('func (key=' + key + ') is not a function.')
-        if np.asarray(xd[key]).shape[-1] != len(yd[key]):
-            raise Exception('x and y input (key=' + key + ') do not have the same length')
-        for i in range(100):
-            try:
-                funcd[key](np.arange(i), x_all.T[0])
-            except TypeError:
-                continue
-            except IndexError:
-                continue
-            else:
-                break
-        else:
-            raise RuntimeError("Fit function (key=" + key + ") is not valid.")
-        n_parms = i
-        n_parms_ls.append(n_parms)
-    n_parms = max(n_parms_ls)
-    if not silent:
-        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
-
-    if 'initial_guess' in kwargs:
-        x0 = kwargs.get('initial_guess')
-        if len(x0) != n_parms:
-            raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms))
-    else:
-        x0 = [0.1] * n_parms
-
-    def general_chisqfunc_uncorr(p, ivars):
-        model = anp.concatenate([anp.array(funcd[key](p, anp.asarray(xd[key]))).reshape(-1) for key in key_ls])
-        return ((ivars - model) / dy_f)
-
-    def chisqfunc_uncorr(p):
-        return anp.sum(general_chisqfunc_uncorr(p, y_f) ** 2)
-
-    if kwargs.get('correlated_fit') is True:
-        corr = covariance(y_all, correlation=True, **kwargs)
-        covdiag = np.diag(1 / np.asarray(dy_f))
-        condn = np.linalg.cond(corr)
-        if condn > 0.1 / np.finfo(float).eps:
-            raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
-        if condn > 1e13:
-            warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
-        chol = np.linalg.cholesky(corr)
-        chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
-
-        def general_chisqfunc(p, ivars):
-            model = anp.concatenate([anp.array(funcd[key](p, anp.asarray(xd[key]))).reshape(-1) for key in key_ls])
-            return anp.dot(chol_inv, (ivars - model))
-
-        def chisqfunc(p):
-            return anp.sum(general_chisqfunc(p, y_f) ** 2)
-    else:
-        general_chisqfunc = general_chisqfunc_uncorr
-        chisqfunc = chisqfunc_uncorr
-
-    output.method = kwargs.get('method', 'Levenberg-Marquardt')
-    if not silent:
-        print('Method:', output.method)
-
-    if output.method != 'Levenberg-Marquardt':
-        if output.method == 'migrad':
-            tolerance = 1e-4  # default value of 1e-1 set by iminuit can be problematic
-            if 'tol' in kwargs:
-                tolerance = kwargs.get('tol')
-            fit_result = iminuit.minimize(chisqfunc_uncorr, x0, tol=tolerance)  # Stopping criterion 0.002 * tol * errordef
-            if kwargs.get('correlated_fit') is True:
-                fit_result = iminuit.minimize(chisqfunc, fit_result.x, tol=tolerance)
-            output.iterations = fit_result.nfev
-        else:
-            tolerance = 1e-12
-            if 'tol' in kwargs:
-                tolerance = kwargs.get('tol')
-            fit_result = scipy.optimize.minimize(chisqfunc_uncorr, x0, method=kwargs.get('method'), tol=tolerance)
-            if kwargs.get('correlated_fit') is True:
-                fit_result = scipy.optimize.minimize(chisqfunc, fit_result.x, method=kwargs.get('method'), tol=tolerance)
-            output.iterations = fit_result.nit
-
-        chisquare = fit_result.fun
-
-    else:
-        if 'tol' in kwargs:
-            print('tol cannot be set for Levenberg-Marquardt')
-
-        def chisqfunc_residuals_uncorr(p):
-            return general_chisqfunc_uncorr(p, y_f)
-
-        fit_result = scipy.optimize.least_squares(chisqfunc_residuals_uncorr, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
-        if kwargs.get('correlated_fit') is True:
-
-            def chisqfunc_residuals(p):
-                return general_chisqfunc(p, y_f)
-
-            fit_result = scipy.optimize.least_squares(chisqfunc_residuals, fit_result.x, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
-
-        chisquare = np.sum(fit_result.fun ** 2)
-        assert np.isclose(chisquare, chisqfunc(fit_result.x), atol=1e-14)
-
-        output.iterations = fit_result.nfev
-
-    if not fit_result.success:
-        raise Exception('The minimization procedure did not converge.')
-
-    if x_all.shape[-1] - n_parms > 0:
-        output.chisquare = chisquare
-        output.dof = x_all.shape[-1] - n_parms
-        output.chisquare_by_dof = output.chisquare / output.dof
-        output.p_value = 1 - scipy.stats.chi2.cdf(output.chisquare, output.dof)
-    else:
-        output.chisquare_by_dof = float('nan')
-
-    output.message = fit_result.message
-    if not silent:
-        print(fit_result.message)
-        print('chisquare/d.o.f.:', output.chisquare_by_dof)
-        print('fit parameters', fit_result.x)
-
-    def prepare_hat_matrix():
-        hat_vector = []
-        for key in key_ls:
-            x_array = np.asarray(xd[key])
-            if (len(x_array) != 0):
-                hat_vector.append(jacobian(funcd[key])(fit_result.x, x_array))
-        hat_vector = [item for sublist in hat_vector for item in sublist]
-        return hat_vector
-
-    if kwargs.get('expected_chisquare') is True:
-        if kwargs.get('correlated_fit') is not True:
-            W = np.diag(1 / np.asarray(dy_f))
-            cov = covariance(y_all)
-            hat_vector = prepare_hat_matrix()
-            A = W @ hat_vector
-            P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
-            expected_chisquare = np.trace((np.identity(x_all.shape[-1]) - P_phi) @ W @ cov @ W)
-            output.chisquare_by_expected_chisquare = output.chisquare / expected_chisquare
-            if not silent:
-                print('chisquare/expected_chisquare:', output.chisquare_by_expected_chisquare)
-
-    fitp = fit_result.x
-    if np.any(np.asarray(dy_f) <= 0.0):
-        raise Exception('No y errors available, run the gamma method first.')
-
-    try:
-        hess = hessian(chisqfunc)(fitp)
-    except TypeError:
-        raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
-
-    def chisqfunc_compact(d):
-        return anp.sum(general_chisqfunc(d[:n_parms], d[n_parms:]) ** 2)
-
-    jac_jac_y = hessian(chisqfunc_compact)(np.concatenate((fitp, y_f)))
-
-    # Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv
-    try:
-        deriv_y = -scipy.linalg.solve(hess, jac_jac_y[:n_parms, n_parms:])
-    except np.linalg.LinAlgError:
-        raise Exception("Cannot invert hessian matrix.")
-
-    result = []
-    for i in range(n_parms):
-        result.append(derived_observable(lambda x_all, **kwargs: (x_all[0] + np.finfo(np.float64).eps) / (y_all[0].value + np.finfo(np.float64).eps) * fitp[i], list(y_all), man_grad=list(deriv_y[i])))
-
-    output.fit_parameters = result
-
-    # Hotelling t-squared p-value for correlated fits.
-    if kwargs.get('correlated_fit') is True:
-        n_cov = np.min(np.vectorize(lambda x_all: x_all.N)(y_all))
-        output.t2_p_value = 1 - scipy.stats.f.cdf((n_cov - output.dof) / (output.dof * (n_cov - 1)) * output.chisquare,
-                                                  output.dof, n_cov - output.dof)
-
-    if kwargs.get('resplot') is True:
-        for key in key_ls:
-            residual_plot(xd[key], yd[key], funcd[key], result, title=key)
-
-    if kwargs.get('qqplot') is True:
-        for key in key_ls:
-            qqplot(xd[key], yd[key], funcd[key], result, title=key)
-
-    return output
-
-
 def fit_lin(x, y, **kwargs):
     """Performs a linear fit to y = n + m * x and returns two Obs n, m.
 
@@ -818,7 +738,7 @@ def residual_plot(x, y, func, fit_res, title=""):
     ax0.set_xticklabels([])
     ax0.legend(title=title)
 
-    residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y])
+    residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], np.asarray(x))) / np.asarray([o.dvalue for o in y])
     ax1 = plt.subplot(gs[1])
     ax1.plot(x, residuals, 'ko', ls='none', markersize=5)
     ax1.tick_params(direction='out')
@@ -897,3 +817,12 @@ def ks_test(objects=None):
     plt.draw()
 
     print(scipy.stats.kstest(p_values, 'uniform'))
+
+
+def _extract_val_and_dval(string):
+    split_string = string.split('(')
+    if '.' in split_string[0] and '.' not in split_string[1][:-1]:
+        factor = 10 ** -len(split_string[0].partition('.')[2])
+    else:
+        factor = 1
+    return float(split_string[0]), float(split_string[1][:-1]) * factor
diff --git a/tests/fits_test.py b/tests/fits_test.py
index 206f5f40..1af0e56e 100644
--- a/tests/fits_test.py
+++ b/tests/fits_test.py
@@ -6,6 +6,9 @@ import scipy.optimize
 from scipy.odr import ODR, Model, RealData
 from scipy.linalg import cholesky
 from scipy.stats import norm
+import iminuit
+from autograd import jacobian
+from autograd import hessian
 import pyerrors as pe
 import pytest
 
@@ -410,6 +413,19 @@ def test_prior_fit():
     fitp = pe.fits.least_squares([0, 1], y, f, priors=y, resplot=True, qqplot=True)
 
 
+def test_vs_old_prior_implementation():
+    x = np.arange(1, 5)
+    y = [pe.pseudo_Obs(2 * i + 1.1 + np.random.normal(0.0, 0.1), .1, 't') for i in x]
+    [o.gm() for o in y];
+    def fitf(a, x):
+        return a[0] * x + a[1]
+    priors = [pe.cov_Obs(1.10, 0.01 ** 2, "p0"), pe.cov_Obs(1.1, 0.3 ** 2, "p1")]
+    pr = pe.fits.least_squares(x, y, fitf, priors=priors, method="migrad")
+    fr = old_prior_fit(x, y, fitf, priors=priors)
+    assert pr[0] == fr[0]
+    assert pr[1] == fr[1]
+
+
 def test_correlated_fit_covobs():
     x1 = pe.cov_Obs(1.01, 0.01 ** 2, 'test1')
     x2 = pe.cov_Obs(2.01, 0.01 ** 2, 'test2')
@@ -924,6 +940,123 @@ def test_x_multidim_fit():
     pe.fits.least_squares(x, y, fitf)
 
 
+def test_priors_dict_vs_list():
+    x = np.arange(1, 5)
+    y = [pe.pseudo_Obs(2 * i + 1.1 + np.random.normal(0.0, 0.01), .01, 't') for i in x]
+    [o.gm() for o in y];
+    def fitf(a, x):
+        return a[0] * x + a[1]
+    priors = [pe.cov_Obs(1.0, 0.0001 ** 2, "p0"), pe.cov_Obs(1.1, 0.8 ** 2, "p1")]
+    pr1 = pe.fits.least_squares(x, y, fitf, priors=priors)
+    prd = {0: priors[0],
+           1: priors[1]}
+
+    pr2 = pe.fits.least_squares(x, y, fitf, priors=prd)
+
+    prd = {1: priors[1],
+           0: priors[0]}
+
+    pr3 = pe.fits.least_squares(x, y, fitf, priors=prd)
+    assert (pr1[0] - pr2[0]).is_zero(1e-6)
+    assert (pr1[1] - pr2[1]).is_zero(1e-6)
+    assert (pr1[0] - pr3[0]).is_zero(1e-6)
+    assert (pr1[1] - pr3[1]).is_zero(1e-6)
+
+
+def test_not_all_priors_set():
+    x = np.arange(1, 5)
+    y = [pe.pseudo_Obs(2 * i + 1.1 + np.random.normal(0.0, 0.01), .01, 't') for i in x]
+    [o.gm() for o in y];
+    def fitf(a, x):
+        return a[0] * x + a[1] + a[2] * x ** 2
+    priors = [pe.cov_Obs(2.0, 0.1 ** 2, "p0"), pe.cov_Obs(2, 0.8 ** 2, "p2")]
+    prd = {0: priors[0],
+           2: priors[1]}
+
+    pr1 = pe.fits.least_squares(x, y, fitf, priors=prd)
+
+    prd = {2: priors[1],
+           0: priors[0]}
+
+    pr2 = pe.fits.least_squares(x, y, fitf, priors=prd)
+    assert (pr1[0] - pr2[0]).is_zero(1e-6)
+    assert (pr1[1] - pr2[1]).is_zero(1e-6)
+    assert (pr1[2] - pr2[2]).is_zero(1e-6)
+
+
+def test_force_fit_on_prior():
+    x = np.arange(1, 10)
+    y = [pe.pseudo_Obs(2 + np.random.normal(0.0, 0.1), .1, 't') for i in x]
+    [o.gm() for o in y];
+    def fitf(a, x):
+        return a[0]
+
+    prd = {0: pe.cov_Obs(0.0, 0.0000001 ** 2, "prior0")}
+
+    pr = pe.fits.least_squares(x, y, fitf, priors=prd)
+    pr.gm()
+
+    diff = prd[0] - pr[0]
+    diff.gm()
+    assert diff.is_zero_within_error(5)
+
+
+def test_constrained_and_prior_fit():
+
+    for my_constant in [pe.pseudo_Obs(5.0, 0.00000001, "test"),
+                        pe.pseudo_Obs(6.5, 0.00000001, "test"),
+                        pe.pseudo_Obs(6.5, 0.00000001, "another_ensmble")]:
+        dim = 10
+        x = np.arange(dim)
+        y = -0.06 * x + 5 + np.random.normal(0.0, 0.3, dim)
+        yerr = [0.3] * dim
+
+        oy = []
+        for i, item in enumerate(x):
+            oy.append(pe.pseudo_Obs(y[i], yerr[i], 'test'))
+
+        def func(a, x):
+            y = a[0] * x + a[1]
+            return y
+
+        # Fit with constrained parameter
+        out = pe.least_squares(x, oy, func, priors={1: my_constant}, silent=True)
+        out.gm()
+
+        def alt_func(a, x):
+            y = a[0] * x
+            return y
+
+        alt_y = np.array(oy) - my_constant
+        [o.gm() for o in alt_y]
+
+        # Fit with the constant subtracted from the data
+        alt_out = pe.least_squares(x, alt_y, alt_func, silent=True)
+        alt_out.gm()
+
+        assert np.isclose(out[0].value, alt_out[0].value, atol=1e-5, rtol=1e-6)
+        assert np.isclose(out[0].dvalue, alt_out[0].dvalue, atol=1e-5, rtol=1e-6)
+        assert np.isclose(out.chisquare_by_dof, alt_out.chisquare_by_dof, atol=1e-5, rtol=1e-6)
+
+
+def test_resplot_lists_in_dict():
+    xd = {
+        'a': [1, 2, 3],
+        'b': [1, 2, 3],
+    }
+    yd = {
+        'a': [pe.pseudo_Obs(i, .1*i, 't') for i in range(1, 4)],
+        'b': [pe.pseudo_Obs(2*i**2, .1*i**2, 't') for i in range(1, 4)]
+    }
+    [[o.gamma_method() for o in y] for y in yd.values()]
+    fd = {
+        'a': lambda a, x: a[0] + a[1] * x,
+        'b': lambda a, x: a[0] + a[2] * x**2,
+    }
+
+    fitp = pe.fits.least_squares(xd, yd, fd, resplot=True)
+
+
 def fit_general(x, y, func, silent=False, **kwargs):
     """Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
 
@@ -1021,3 +1154,126 @@ def fit_general(x, y, func, silent=False, **kwargs):
     for n in range(n_parms):
         res.append(pe.derived_observable(do_the_fit, obs, function=func, xerr=xerr, yerr=yerr, x_constants=x_constants, num_grad=True, n=n, **kwargs))
     return res
+
+
+def old_prior_fit(x, y, func, priors, silent=False, **kwargs):
+    output = pe.fits.Fit_result()
+
+    output.fit_function = func
+
+    x = np.asarray(x)
+
+    if not callable(func):
+        raise TypeError('func has to be a function.')
+
+    for i in range(100):
+        try:
+            func(np.arange(i), 0)
+        except TypeError:
+            continue
+        except IndexError:
+            continue
+        else:
+            break
+    else:
+        raise RuntimeError("Fit function is not valid.")
+
+    n_parms = i
+
+    if n_parms != len(priors):
+        raise Exception('Priors does not have the correct length.')
+
+    def extract_val_and_dval(string):
+        split_string = string.split('(')
+        if '.' in split_string[0] and '.' not in split_string[1][:-1]:
+            factor = 10 ** -len(split_string[0].partition('.')[2])
+        else:
+            factor = 1
+        return float(split_string[0]), float(split_string[1][:-1]) * factor
+
+    loc_priors = []
+    for i_n, i_prior in enumerate(priors):
+        if isinstance(i_prior, pe.Obs):
+            loc_priors.append(i_prior)
+        else:
+            loc_val, loc_dval = extract_val_and_dval(i_prior)
+            loc_priors.append(pe.cov_Obs(loc_val, loc_dval ** 2, '#prior' + str(i_n) + f"_{np.random.randint(2147483647):010d}"))
+
+    output.priors = loc_priors
+
+    if not silent:
+        print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1))
+
+    y_f = [o.value for o in y]
+    dy_f = [o.dvalue for o in y]
+
+    if np.any(np.asarray(dy_f) <= 0.0):
+        raise Exception('No y errors available, run the gamma method first.')
+
+    p_f = [o.value for o in loc_priors]
+    dp_f = [o.dvalue for o in loc_priors]
+
+    if np.any(np.asarray(dp_f) <= 0.0):
+        raise Exception('No prior errors available, run the gamma method first.')
+
+    if 'initial_guess' in kwargs:
+        x0 = kwargs.get('initial_guess')
+        if len(x0) != n_parms:
+            raise Exception('Initial guess does not have the correct length.')
+    else:
+        x0 = p_f
+
+    def chisqfunc(p):
+        model = func(p, x)
+        chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((p_f - p) / dp_f) ** 2)
+        return chisq
+
+    if not silent:
+        print('Method: migrad')
+
+    m = iminuit.Minuit(chisqfunc, x0)
+    m.errordef = 1
+    m.print_level = 0
+    if 'tol' in kwargs:
+        m.tol = kwargs.get('tol')
+    else:
+        m.tol = 1e-4
+    m.migrad()
+    params = np.asarray(m.values)
+
+    output.chisquare_by_dof = m.fval / len(x)
+
+    output.method = 'migrad'
+
+    if not silent:
+        print('chisquare/d.o.f.:', output.chisquare_by_dof)
+
+    if not m.fmin.is_valid:
+        raise Exception('The minimization procedure did not converge.')
+
+    hess = hessian(chisqfunc)(params)
+    hess_inv = np.linalg.pinv(hess)
+
+    def chisqfunc_compact(d):
+        model = func(d[:n_parms], x)
+        chisq = anp.sum(((d[n_parms: n_parms + len(x)] - model) / dy_f) ** 2) + anp.sum(((d[n_parms + len(x):] - d[:n_parms]) / dp_f) ** 2)
+        return chisq
+
+    jac_jac = hessian(chisqfunc_compact)(np.concatenate((params, y_f, p_f)))
+
+    deriv = -hess_inv @ jac_jac[:n_parms, n_parms:]
+
+    result = []
+    for i in range(n_parms):
+        result.append(pe.derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * params[i], list(y) + list(loc_priors), man_grad=list(deriv[i])))
+
+    output.fit_parameters = result
+    output.chisquare = chisqfunc(np.asarray(params))
+
+    if kwargs.get('resplot') is True:
+        residual_plot(x, y, func, result)
+
+    if kwargs.get('qqplot') is True:
+        qqplot(x, y, func, result)
+
+    return output
diff --git a/tests/obs_test.py b/tests/obs_test.py
index 3eefdfc3..104d3978 100644
--- a/tests/obs_test.py
+++ b/tests/obs_test.py
@@ -1085,7 +1085,7 @@ def test_non_overlapping_missing_cnfgs():
     average = (even + odd) / 2
     average.gm(S=0)
     assert np.isclose(full.value, average.value)
-    assert np.isclose(full.dvalue, average.dvalue, rtol=0.01)
+    assert np.isclose(full.dvalue, average.dvalue, rtol=0.02)
 
 
 def test_non_overlapping_operations():