mirror of
https://github.com/fjosw/pyerrors.git
synced 2025-05-14 19:43:41 +02:00
matrix valued operations speed up, inv and cholesky explictly added
This commit is contained in:
parent
60d2e25ed7
commit
c3f8b24a21
2 changed files with 200 additions and 45 deletions
|
@ -2,8 +2,9 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from autograd import jacobian
|
||||||
import autograd.numpy as anp # Thinly-wrapped numpy
|
import autograd.numpy as anp # Thinly-wrapped numpy
|
||||||
from .pyerrors import derived_observable, CObs
|
from .pyerrors import derived_observable, CObs, Obs
|
||||||
|
|
||||||
|
|
||||||
# This code block is directly taken from the current master branch of autograd and remains
|
# This code block is directly taken from the current master branch of autograd and remains
|
||||||
|
@ -11,50 +12,135 @@ from .pyerrors import derived_observable, CObs
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from autograd.extend import defvjp
|
from autograd.extend import defvjp
|
||||||
|
|
||||||
_dot = partial(anp.einsum, '...ij,...jk->...ik')
|
|
||||||
|
def derived_array(func, data, **kwargs):
|
||||||
|
"""Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
func -- arbitrary function of the form func(data, **kwargs). For the
|
||||||
|
automatic differentiation to work, all numpy functions have to have
|
||||||
|
the autograd wrapper (use 'import autograd.numpy as anp').
|
||||||
|
data -- list of Obs, e.g. [obs1, obs2, obs3].
|
||||||
|
|
||||||
|
Keyword arguments
|
||||||
|
-----------------
|
||||||
|
num_grad -- if True, numerical derivatives are used instead of autograd
|
||||||
|
(default False). To control the numerical differentiation the
|
||||||
|
kwargs of numdifftools.step_generators.MaxStepGenerator
|
||||||
|
can be used.
|
||||||
|
man_grad -- manually supply a list or an array which contains the jacobian
|
||||||
|
of func. Use cautiously, supplying the wrong derivative will
|
||||||
|
not be intercepted.
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
For simple mathematical operations it can be practical to use anonymous
|
||||||
|
functions. For the ratio of two observables one can e.g. use
|
||||||
|
|
||||||
|
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
|
||||||
|
"""
|
||||||
|
|
||||||
|
data = np.asarray(data)
|
||||||
|
raveled_data = data.ravel()
|
||||||
|
|
||||||
|
# Workaround for matrix operations containing non Obs data
|
||||||
|
for i_data in raveled_data:
|
||||||
|
if isinstance(i_data, Obs):
|
||||||
|
first_name = i_data.names[0]
|
||||||
|
first_shape = i_data.shape[first_name]
|
||||||
|
break
|
||||||
|
|
||||||
|
for i in range(len(raveled_data)):
|
||||||
|
if isinstance(raveled_data[i], (int, float)):
|
||||||
|
raveled_data[i] = Obs([raveled_data[i] + np.zeros(first_shape)], [first_name])
|
||||||
|
|
||||||
|
n_obs = len(raveled_data)
|
||||||
|
new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
|
||||||
|
|
||||||
|
new_shape = {}
|
||||||
|
for i_data in raveled_data:
|
||||||
|
for name in new_names:
|
||||||
|
tmp = i_data.shape.get(name)
|
||||||
|
if tmp is not None:
|
||||||
|
if new_shape.get(name) is None:
|
||||||
|
new_shape[name] = tmp
|
||||||
|
else:
|
||||||
|
if new_shape[name] != tmp:
|
||||||
|
raise Exception('Shapes of ensemble', name, 'do not match.')
|
||||||
|
if data.ndim == 1:
|
||||||
|
values = np.array([o.value for o in data])
|
||||||
|
else:
|
||||||
|
values = np.vectorize(lambda x: x.value)(data)
|
||||||
|
|
||||||
|
new_values = func(values, **kwargs)
|
||||||
|
|
||||||
|
new_r_values = {}
|
||||||
|
for name in new_names:
|
||||||
|
tmp_values = np.zeros(n_obs)
|
||||||
|
for i, item in enumerate(raveled_data):
|
||||||
|
tmp = item.r_values.get(name)
|
||||||
|
if tmp is None:
|
||||||
|
tmp = item.value
|
||||||
|
tmp_values[i] = tmp
|
||||||
|
tmp_values = np.array(tmp_values).reshape(data.shape)
|
||||||
|
new_r_values[name] = func(tmp_values, **kwargs)
|
||||||
|
|
||||||
|
if 'man_grad' in kwargs:
|
||||||
|
deriv = np.asarray(kwargs.get('man_grad'))
|
||||||
|
if new_values.shape + data.shape != deriv.shape:
|
||||||
|
raise Exception('Manual derivative does not have correct shape.')
|
||||||
|
elif kwargs.get('num_grad') is True:
|
||||||
|
raise Exception('Multi mode currently not supported for numerical derivative')
|
||||||
|
else:
|
||||||
|
deriv = jacobian(func)(values, **kwargs)
|
||||||
|
|
||||||
|
final_result = np.zeros(new_values.shape, dtype=object)
|
||||||
|
|
||||||
|
d_extracted = {}
|
||||||
|
for name in new_names:
|
||||||
|
d_extracted[name] = []
|
||||||
|
for i_dat, dat in enumerate(data):
|
||||||
|
ens_length = dat.ravel()[0].shape[name]
|
||||||
|
d_extracted[name].append(np.array([o.deltas[name] for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
|
||||||
|
|
||||||
|
for i_val, new_val in np.ndenumerate(new_values):
|
||||||
|
new_deltas = {}
|
||||||
|
for name in new_names:
|
||||||
|
ens_length = d_extracted[name][0].shape[-1]
|
||||||
|
new_deltas[name] = np.zeros(ens_length)
|
||||||
|
for i_dat, dat in enumerate(d_extracted[name]):
|
||||||
|
new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
|
||||||
|
|
||||||
|
new_samples = []
|
||||||
|
new_means = []
|
||||||
|
for name in new_names:
|
||||||
|
new_samples.append(new_deltas[name])
|
||||||
|
new_means.append(new_r_values[name][i_val])
|
||||||
|
|
||||||
|
final_result[i_val] = Obs(new_samples, new_names, means=new_means)
|
||||||
|
final_result[i_val]._value = new_val
|
||||||
|
|
||||||
|
return final_result
|
||||||
|
|
||||||
|
|
||||||
# batched diag
|
def matmul(x1, x2):
|
||||||
def _diag(a):
|
if isinstance(x1[0, 0], CObs) or isinstance(x2[0, 0], CObs):
|
||||||
return anp.eye(a.shape[-1]) * a
|
Lr, Li = np.vectorize(lambda x: (np.real(x), np.imag(x)))(x1)
|
||||||
|
Rr, Ri = np.vectorize(lambda x: (np.real(x), np.imag(x)))(x2)
|
||||||
|
Nr = derived_array(lambda x: x[0] @ x[2] - x[1] @ x[3], [Lr, Li, Rr, Ri])
|
||||||
|
Ni = derived_array(lambda x: x[0] @ x[3] + x[1] @ x[2], [Lr, Li, Rr, Ri])
|
||||||
|
return (1 + 0j) * Nr + 1j * Ni
|
||||||
|
else:
|
||||||
|
return derived_array(lambda x: x[0] @ x[1], [x1, x2])
|
||||||
|
|
||||||
|
|
||||||
# batched diagonal, similar to matrix_diag in tensorflow
|
def inv(x):
|
||||||
def _matrix_diag(a):
|
return mat_mat_op(anp.linalg.inv, x)
|
||||||
reps = anp.array(a.shape)
|
|
||||||
reps[:-1] = 1
|
|
||||||
reps[-1] = a.shape[-1]
|
|
||||||
newshape = list(a.shape) + [a.shape[-1]]
|
|
||||||
return _diag(anp.tile(a, reps).reshape(newshape))
|
|
||||||
|
|
||||||
# https://arxiv.org/pdf/1701.00392.pdf Eq(4.77)
|
|
||||||
# Note the formula from Sec3.1 in https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf is incomplete
|
|
||||||
|
|
||||||
|
|
||||||
def grad_eig(ans, x):
|
def cholesky(x):
|
||||||
"""Gradient of a general square (complex valued) matrix"""
|
return mat_mat_op(anp.linalg.cholesky, x)
|
||||||
e, u = ans # eigenvalues as 1d array, eigenvectors in columns
|
|
||||||
n = e.shape[-1]
|
|
||||||
|
|
||||||
def vjp(g):
|
|
||||||
ge, gu = g
|
|
||||||
ge = _matrix_diag(ge)
|
|
||||||
f = 1 / (e[..., anp.newaxis, :] - e[..., :, anp.newaxis] + 1.e-20)
|
|
||||||
f -= _diag(f)
|
|
||||||
ut = anp.swapaxes(u, -1, -2)
|
|
||||||
r1 = f * _dot(ut, gu)
|
|
||||||
r2 = -f * (_dot(_dot(ut, anp.conj(u)), anp.real(_dot(ut, gu)) * anp.eye(n)))
|
|
||||||
r = _dot(_dot(anp.linalg.inv(ut), ge + r1 + r2), ut)
|
|
||||||
if not anp.iscomplexobj(x):
|
|
||||||
r = anp.real(r)
|
|
||||||
# the derivative is still complex for real input (imaginary delta is allowed), real output
|
|
||||||
# but the derivative should be real in real input case when imaginary delta is forbidden
|
|
||||||
return r
|
|
||||||
return vjp
|
|
||||||
|
|
||||||
|
|
||||||
defvjp(anp.linalg.eig, grad_eig)
|
|
||||||
# End of the code block from autograd.master
|
|
||||||
|
|
||||||
|
|
||||||
def scalar_mat_op(op, obs, **kwargs):
|
def scalar_mat_op(op, obs, **kwargs):
|
||||||
|
@ -107,7 +193,7 @@ def mat_mat_op(op, obs, **kwargs):
|
||||||
else:
|
else:
|
||||||
if kwargs.get('num_grad') is True:
|
if kwargs.get('num_grad') is True:
|
||||||
return _num_diff_mat_mat_op(op, obs, **kwargs)
|
return _num_diff_mat_mat_op(op, obs, **kwargs)
|
||||||
return derived_observable(lambda x, **kwargs: op(x), obs)
|
return derived_array(lambda x, **kwargs: op(x), [obs])[0]
|
||||||
|
|
||||||
|
|
||||||
def eigh(obs, **kwargs):
|
def eigh(obs, **kwargs):
|
||||||
|
@ -375,3 +461,49 @@ def _num_diff_svd(obs, **kwargs):
|
||||||
res_mat2.append(row)
|
res_mat2.append(row)
|
||||||
|
|
||||||
return (np.array(res_mat0) @ np.identity(mid_index), np.array(res_mat1) @ np.identity(mid_index), np.array(res_mat2) @ np.identity(shape[1]))
|
return (np.array(res_mat0) @ np.identity(mid_index), np.array(res_mat1) @ np.identity(mid_index), np.array(res_mat2) @ np.identity(shape[1]))
|
||||||
|
|
||||||
|
|
||||||
|
_dot = partial(anp.einsum, '...ij,...jk->...ik')
|
||||||
|
|
||||||
|
|
||||||
|
# batched diag
|
||||||
|
def _diag(a):
|
||||||
|
return anp.eye(a.shape[-1]) * a
|
||||||
|
|
||||||
|
|
||||||
|
# batched diagonal, similar to matrix_diag in tensorflow
|
||||||
|
def _matrix_diag(a):
|
||||||
|
reps = anp.array(a.shape)
|
||||||
|
reps[:-1] = 1
|
||||||
|
reps[-1] = a.shape[-1]
|
||||||
|
newshape = list(a.shape) + [a.shape[-1]]
|
||||||
|
return _diag(anp.tile(a, reps).reshape(newshape))
|
||||||
|
|
||||||
|
# https://arxiv.org/pdf/1701.00392.pdf Eq(4.77)
|
||||||
|
# Note the formula from Sec3.1 in https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf is incomplete
|
||||||
|
|
||||||
|
|
||||||
|
def grad_eig(ans, x):
|
||||||
|
"""Gradient of a general square (complex valued) matrix"""
|
||||||
|
e, u = ans # eigenvalues as 1d array, eigenvectors in columns
|
||||||
|
n = e.shape[-1]
|
||||||
|
|
||||||
|
def vjp(g):
|
||||||
|
ge, gu = g
|
||||||
|
ge = _matrix_diag(ge)
|
||||||
|
f = 1 / (e[..., anp.newaxis, :] - e[..., :, anp.newaxis] + 1.e-20)
|
||||||
|
f -= _diag(f)
|
||||||
|
ut = anp.swapaxes(u, -1, -2)
|
||||||
|
r1 = f * _dot(ut, gu)
|
||||||
|
r2 = -f * (_dot(_dot(ut, anp.conj(u)), anp.real(_dot(ut, gu)) * anp.eye(n)))
|
||||||
|
r = _dot(_dot(anp.linalg.inv(ut), ge + r1 + r2), ut)
|
||||||
|
if not anp.iscomplexobj(x):
|
||||||
|
r = anp.real(r)
|
||||||
|
# the derivative is still complex for real input (imaginary delta is allowed), real output
|
||||||
|
# but the derivative should be real in real input case when imaginary delta is forbidden
|
||||||
|
return r
|
||||||
|
return vjp
|
||||||
|
|
||||||
|
|
||||||
|
defvjp(anp.linalg.eig, grad_eig)
|
||||||
|
# End of the code block from autograd.master
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import autograd.numpy as np
|
import numpy as np
|
||||||
|
import autograd.numpy as anp
|
||||||
import math
|
import math
|
||||||
import pyerrors as pe
|
import pyerrors as pe
|
||||||
import pytest
|
import pytest
|
||||||
|
@ -6,6 +7,28 @@ import pytest
|
||||||
np.random.seed(0)
|
np.random.seed(0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_matmul():
|
||||||
|
for dim in [4, 8]:
|
||||||
|
my_list = []
|
||||||
|
length = 1000 + np.random.randint(200)
|
||||||
|
for i in range(dim ** 2):
|
||||||
|
my_list.append(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']))
|
||||||
|
my_array = np.array(my_list).reshape((dim, dim))
|
||||||
|
tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array
|
||||||
|
for t, e in np.ndenumerate(tt):
|
||||||
|
assert e.is_zero(), t
|
||||||
|
|
||||||
|
my_list = []
|
||||||
|
length = 1000 + np.random.randint(200)
|
||||||
|
for i in range(dim ** 2):
|
||||||
|
my_list.append(pe.CObs(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']),
|
||||||
|
pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2'])))
|
||||||
|
my_array = np.array(my_list).reshape((dim, dim))
|
||||||
|
tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array
|
||||||
|
for t, e in np.ndenumerate(tt):
|
||||||
|
assert e.is_zero(), t
|
||||||
|
|
||||||
|
|
||||||
def test_matrix_inverse():
|
def test_matrix_inverse():
|
||||||
content = []
|
content = []
|
||||||
for t in range(9):
|
for t in range(9):
|
||||||
|
@ -14,7 +37,7 @@ def test_matrix_inverse():
|
||||||
|
|
||||||
content.append(1.0) # Add 1.0 as a float
|
content.append(1.0) # Add 1.0 as a float
|
||||||
matrix = np.diag(content)
|
matrix = np.diag(content)
|
||||||
inverse_matrix = pe.linalg.mat_mat_op(np.linalg.inv, matrix)
|
inverse_matrix = pe.linalg.inv(matrix)
|
||||||
assert all([o.is_zero() for o in np.diag(matrix) * np.diag(inverse_matrix) - 1])
|
assert all([o.is_zero() for o in np.diag(matrix) * np.diag(inverse_matrix) - 1])
|
||||||
|
|
||||||
|
|
||||||
|
@ -35,7 +58,7 @@ def test_complex_matrix_inverse():
|
||||||
matrix[n, m] = entry.real.value + 1j * entry.imag.value
|
matrix[n, m] = entry.real.value + 1j * entry.imag.value
|
||||||
|
|
||||||
inverse_matrix = np.linalg.inv(matrix)
|
inverse_matrix = np.linalg.inv(matrix)
|
||||||
inverse_obs_matrix = pe.linalg.mat_mat_op(np.linalg.inv, obs_matrix)
|
inverse_obs_matrix = pe.linalg.inv(obs_matrix)
|
||||||
for (n, m), entry in np.ndenumerate(inverse_matrix):
|
for (n, m), entry in np.ndenumerate(inverse_matrix):
|
||||||
assert np.isclose(inverse_matrix[n, m].real, inverse_obs_matrix[n, m].real.value)
|
assert np.isclose(inverse_matrix[n, m].real, inverse_obs_matrix[n, m].real.value)
|
||||||
assert np.isclose(inverse_matrix[n, m].imag, inverse_obs_matrix[n, m].imag.value)
|
assert np.isclose(inverse_matrix[n, m].imag, inverse_obs_matrix[n, m].imag.value)
|
||||||
|
@ -53,7 +76,7 @@ def test_matrix_functions():
|
||||||
matrix = np.array(matrix) @ np.identity(dim)
|
matrix = np.array(matrix) @ np.identity(dim)
|
||||||
|
|
||||||
# Check inverse of matrix
|
# Check inverse of matrix
|
||||||
inv = pe.linalg.mat_mat_op(np.linalg.inv, matrix)
|
inv = pe.linalg.inv(matrix)
|
||||||
check_inv = matrix @ inv
|
check_inv = matrix @ inv
|
||||||
|
|
||||||
for (i, j), entry in np.ndenumerate(check_inv):
|
for (i, j), entry in np.ndenumerate(check_inv):
|
||||||
|
@ -66,7 +89,7 @@ def test_matrix_functions():
|
||||||
|
|
||||||
# Check Cholesky decomposition
|
# Check Cholesky decomposition
|
||||||
sym = np.dot(matrix, matrix.T)
|
sym = np.dot(matrix, matrix.T)
|
||||||
cholesky = pe.linalg.mat_mat_op(np.linalg.cholesky, sym)
|
cholesky = pe.linalg.cholesky(sym)
|
||||||
check = cholesky @ cholesky.T
|
check = cholesky @ cholesky.T
|
||||||
|
|
||||||
for (i, j), entry in np.ndenumerate(check):
|
for (i, j), entry in np.ndenumerate(check):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue