diff --git a/pyerrors/linalg.py b/pyerrors/linalg.py index 0394af8e..d47e5dc0 100644 --- a/pyerrors/linalg.py +++ b/pyerrors/linalg.py @@ -2,8 +2,9 @@ # coding: utf-8 import numpy as np +from autograd import jacobian import autograd.numpy as anp # Thinly-wrapped numpy -from .pyerrors import derived_observable, CObs +from .pyerrors import derived_observable, CObs, Obs # This code block is directly taken from the current master branch of autograd and remains @@ -11,50 +12,135 @@ from .pyerrors import derived_observable, CObs from functools import partial from autograd.extend import defvjp -_dot = partial(anp.einsum, '...ij,...jk->...ik') + +def derived_array(func, data, **kwargs): + """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. + + Parameters + ---------- + func -- arbitrary function of the form func(data, **kwargs). For the + automatic differentiation to work, all numpy functions have to have + the autograd wrapper (use 'import autograd.numpy as anp'). + data -- list of Obs, e.g. [obs1, obs2, obs3]. + + Keyword arguments + ----------------- + num_grad -- if True, numerical derivatives are used instead of autograd + (default False). To control the numerical differentiation the + kwargs of numdifftools.step_generators.MaxStepGenerator + can be used. + man_grad -- manually supply a list or an array which contains the jacobian + of func. Use cautiously, supplying the wrong derivative will + not be intercepted. + + Notes + ----- + For simple mathematical operations it can be practical to use anonymous + functions. For the ratio of two observables one can e.g. use + + new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) + """ + + data = np.asarray(data) + raveled_data = data.ravel() + + # Workaround for matrix operations containing non Obs data + for i_data in raveled_data: + if isinstance(i_data, Obs): + first_name = i_data.names[0] + first_shape = i_data.shape[first_name] + break + + for i in range(len(raveled_data)): + if isinstance(raveled_data[i], (int, float)): + raveled_data[i] = Obs([raveled_data[i] + np.zeros(first_shape)], [first_name]) + + n_obs = len(raveled_data) + new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) + + new_shape = {} + for i_data in raveled_data: + for name in new_names: + tmp = i_data.shape.get(name) + if tmp is not None: + if new_shape.get(name) is None: + new_shape[name] = tmp + else: + if new_shape[name] != tmp: + raise Exception('Shapes of ensemble', name, 'do not match.') + if data.ndim == 1: + values = np.array([o.value for o in data]) + else: + values = np.vectorize(lambda x: x.value)(data) + + new_values = func(values, **kwargs) + + new_r_values = {} + for name in new_names: + tmp_values = np.zeros(n_obs) + for i, item in enumerate(raveled_data): + tmp = item.r_values.get(name) + if tmp is None: + tmp = item.value + tmp_values[i] = tmp + tmp_values = np.array(tmp_values).reshape(data.shape) + new_r_values[name] = func(tmp_values, **kwargs) + + if 'man_grad' in kwargs: + deriv = np.asarray(kwargs.get('man_grad')) + if new_values.shape + data.shape != deriv.shape: + raise Exception('Manual derivative does not have correct shape.') + elif kwargs.get('num_grad') is True: + raise Exception('Multi mode currently not supported for numerical derivative') + else: + deriv = jacobian(func)(values, **kwargs) + + final_result = np.zeros(new_values.shape, dtype=object) + + d_extracted = {} + for name in new_names: + d_extracted[name] = [] + for i_dat, dat in enumerate(data): + ens_length = dat.ravel()[0].shape[name] + d_extracted[name].append(np.array([o.deltas[name] for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) + + for i_val, new_val in np.ndenumerate(new_values): + new_deltas = {} + for name in new_names: + ens_length = d_extracted[name][0].shape[-1] + new_deltas[name] = np.zeros(ens_length) + for i_dat, dat in enumerate(d_extracted[name]): + new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) + + new_samples = [] + new_means = [] + for name in new_names: + new_samples.append(new_deltas[name]) + new_means.append(new_r_values[name][i_val]) + + final_result[i_val] = Obs(new_samples, new_names, means=new_means) + final_result[i_val]._value = new_val + + return final_result -# batched diag -def _diag(a): - return anp.eye(a.shape[-1]) * a +def matmul(x1, x2): + if isinstance(x1[0, 0], CObs) or isinstance(x2[0, 0], CObs): + Lr, Li = np.vectorize(lambda x: (np.real(x), np.imag(x)))(x1) + Rr, Ri = np.vectorize(lambda x: (np.real(x), np.imag(x)))(x2) + Nr = derived_array(lambda x: x[0] @ x[2] - x[1] @ x[3], [Lr, Li, Rr, Ri]) + Ni = derived_array(lambda x: x[0] @ x[3] + x[1] @ x[2], [Lr, Li, Rr, Ri]) + return (1 + 0j) * Nr + 1j * Ni + else: + return derived_array(lambda x: x[0] @ x[1], [x1, x2]) -# batched diagonal, similar to matrix_diag in tensorflow -def _matrix_diag(a): - reps = anp.array(a.shape) - reps[:-1] = 1 - reps[-1] = a.shape[-1] - newshape = list(a.shape) + [a.shape[-1]] - return _diag(anp.tile(a, reps).reshape(newshape)) - -# https://arxiv.org/pdf/1701.00392.pdf Eq(4.77) -# Note the formula from Sec3.1 in https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf is incomplete +def inv(x): + return mat_mat_op(anp.linalg.inv, x) -def grad_eig(ans, x): - """Gradient of a general square (complex valued) matrix""" - e, u = ans # eigenvalues as 1d array, eigenvectors in columns - n = e.shape[-1] - - def vjp(g): - ge, gu = g - ge = _matrix_diag(ge) - f = 1 / (e[..., anp.newaxis, :] - e[..., :, anp.newaxis] + 1.e-20) - f -= _diag(f) - ut = anp.swapaxes(u, -1, -2) - r1 = f * _dot(ut, gu) - r2 = -f * (_dot(_dot(ut, anp.conj(u)), anp.real(_dot(ut, gu)) * anp.eye(n))) - r = _dot(_dot(anp.linalg.inv(ut), ge + r1 + r2), ut) - if not anp.iscomplexobj(x): - r = anp.real(r) - # the derivative is still complex for real input (imaginary delta is allowed), real output - # but the derivative should be real in real input case when imaginary delta is forbidden - return r - return vjp - - -defvjp(anp.linalg.eig, grad_eig) -# End of the code block from autograd.master +def cholesky(x): + return mat_mat_op(anp.linalg.cholesky, x) def scalar_mat_op(op, obs, **kwargs): @@ -107,7 +193,7 @@ def mat_mat_op(op, obs, **kwargs): else: if kwargs.get('num_grad') is True: return _num_diff_mat_mat_op(op, obs, **kwargs) - return derived_observable(lambda x, **kwargs: op(x), obs) + return derived_array(lambda x, **kwargs: op(x), [obs])[0] def eigh(obs, **kwargs): @@ -375,3 +461,49 @@ def _num_diff_svd(obs, **kwargs): res_mat2.append(row) return (np.array(res_mat0) @ np.identity(mid_index), np.array(res_mat1) @ np.identity(mid_index), np.array(res_mat2) @ np.identity(shape[1])) + + +_dot = partial(anp.einsum, '...ij,...jk->...ik') + + +# batched diag +def _diag(a): + return anp.eye(a.shape[-1]) * a + + +# batched diagonal, similar to matrix_diag in tensorflow +def _matrix_diag(a): + reps = anp.array(a.shape) + reps[:-1] = 1 + reps[-1] = a.shape[-1] + newshape = list(a.shape) + [a.shape[-1]] + return _diag(anp.tile(a, reps).reshape(newshape)) + +# https://arxiv.org/pdf/1701.00392.pdf Eq(4.77) +# Note the formula from Sec3.1 in https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf is incomplete + + +def grad_eig(ans, x): + """Gradient of a general square (complex valued) matrix""" + e, u = ans # eigenvalues as 1d array, eigenvectors in columns + n = e.shape[-1] + + def vjp(g): + ge, gu = g + ge = _matrix_diag(ge) + f = 1 / (e[..., anp.newaxis, :] - e[..., :, anp.newaxis] + 1.e-20) + f -= _diag(f) + ut = anp.swapaxes(u, -1, -2) + r1 = f * _dot(ut, gu) + r2 = -f * (_dot(_dot(ut, anp.conj(u)), anp.real(_dot(ut, gu)) * anp.eye(n))) + r = _dot(_dot(anp.linalg.inv(ut), ge + r1 + r2), ut) + if not anp.iscomplexobj(x): + r = anp.real(r) + # the derivative is still complex for real input (imaginary delta is allowed), real output + # but the derivative should be real in real input case when imaginary delta is forbidden + return r + return vjp + + +defvjp(anp.linalg.eig, grad_eig) +# End of the code block from autograd.master diff --git a/tests/test_linalg.py b/tests/test_linalg.py index af121912..b978b35b 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -1,4 +1,5 @@ -import autograd.numpy as np +import numpy as np +import autograd.numpy as anp import math import pyerrors as pe import pytest @@ -6,6 +7,28 @@ import pytest np.random.seed(0) +def test_matmul(): + for dim in [4, 8]: + my_list = [] + length = 1000 + np.random.randint(200) + for i in range(dim ** 2): + my_list.append(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2'])) + my_array = np.array(my_list).reshape((dim, dim)) + tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array + for t, e in np.ndenumerate(tt): + assert e.is_zero(), t + + my_list = [] + length = 1000 + np.random.randint(200) + for i in range(dim ** 2): + my_list.append(pe.CObs(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']), + pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']))) + my_array = np.array(my_list).reshape((dim, dim)) + tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array + for t, e in np.ndenumerate(tt): + assert e.is_zero(), t + + def test_matrix_inverse(): content = [] for t in range(9): @@ -14,7 +37,7 @@ def test_matrix_inverse(): content.append(1.0) # Add 1.0 as a float matrix = np.diag(content) - inverse_matrix = pe.linalg.mat_mat_op(np.linalg.inv, matrix) + inverse_matrix = pe.linalg.inv(matrix) assert all([o.is_zero() for o in np.diag(matrix) * np.diag(inverse_matrix) - 1]) @@ -35,7 +58,7 @@ def test_complex_matrix_inverse(): matrix[n, m] = entry.real.value + 1j * entry.imag.value inverse_matrix = np.linalg.inv(matrix) - inverse_obs_matrix = pe.linalg.mat_mat_op(np.linalg.inv, obs_matrix) + inverse_obs_matrix = pe.linalg.inv(obs_matrix) for (n, m), entry in np.ndenumerate(inverse_matrix): assert np.isclose(inverse_matrix[n, m].real, inverse_obs_matrix[n, m].real.value) assert np.isclose(inverse_matrix[n, m].imag, inverse_obs_matrix[n, m].imag.value) @@ -53,7 +76,7 @@ def test_matrix_functions(): matrix = np.array(matrix) @ np.identity(dim) # Check inverse of matrix - inv = pe.linalg.mat_mat_op(np.linalg.inv, matrix) + inv = pe.linalg.inv(matrix) check_inv = matrix @ inv for (i, j), entry in np.ndenumerate(check_inv): @@ -66,7 +89,7 @@ def test_matrix_functions(): # Check Cholesky decomposition sym = np.dot(matrix, matrix.T) - cholesky = pe.linalg.mat_mat_op(np.linalg.cholesky, sym) + cholesky = pe.linalg.cholesky(sym) check = cholesky @ cholesky.T for (i, j), entry in np.ndenumerate(check):