From f1394bbde6013766b0b1a6fde9f6c9338430e17a Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Tue, 9 Nov 2021 10:27:50 +0000
Subject: [PATCH] jackknifing module removed

---
 pyerrors/jackknifing.py | 155 ----------------------------------------
 pyerrors/linalg.py      |   3 -
 pyerrors/misc.py        |   3 -
 pyerrors/mpm.py         |  74 +++----------------
 pyerrors/obs.py         |   3 -
 5 files changed, 11 insertions(+), 227 deletions(-)
 delete mode 100644 pyerrors/jackknifing.py

diff --git a/pyerrors/jackknifing.py b/pyerrors/jackknifing.py
deleted file mode 100644
index 2eadba82..00000000
--- a/pyerrors/jackknifing.py
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-import pickle
-import matplotlib.pyplot as plt
-import numpy as np
-
-
-def _jack_error(jack):
-    n = jack.size
-    mean = np.mean(jack)
-    error = 0
-    for i in range(n):
-        error += (jack[i] - mean) ** 2
-
-    return np.sqrt((n - 1) / n * error)
-
-
-class Jack:
-
-    def __init__(self, value, jacks):
-        self.jacks = jacks
-        self.N = list(map(np.size, self.jacks))
-        self.max_binsize = len(self.N)
-        self.value = value  # list(map(np.mean, self.jacks))
-        self.dvalue = list(map(_jack_error, self.jacks))
-
-    def print(self, **kwargs):
-        """Print basic properties of the Jack."""
-
-        if 'binsize' in kwargs:
-            b = kwargs.get('binsize') - 1
-            if b == -1:
-                b = 0
-            if not isinstance(b, int):
-                raise TypeError('binsize has to be integer')
-            if b + 1 > self.max_binsize:
-                raise Exception('Chosen binsize not calculated')
-        else:
-            b = 0
-
-        print('Result:\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self.dvalue[b], self.dvalue[b] * np.sqrt(2 * b / self.N[0]), np.abs(self.dvalue[b] / self.value * 100)))
-
-    def plot_tauint(self):
-        plt.xlabel('binsize')
-        plt.ylabel('tauint')
-        length = self.max_binsize
-        x = np.arange(length) + 1
-        plt.errorbar(x[:], (self.dvalue[:] / self.dvalue[0]) ** 2 / 2, yerr=np.sqrt(((2 * (self.dvalue[:] / self.dvalue[0]) ** 2 * np.sqrt(2 * x[:] / self.N[0])) / 2) ** 2 + ((2 * (self.dvalue[:] / self.dvalue[0]) ** 2 * np.sqrt(2 / self.N[0])) / 2) ** 2), linewidth=1, capsize=2)
-        plt.xlim(0.5, length + 0.5)
-        plt.title('Tauint')
-        plt.show()
-
-    def plot_history(self):
-        N = self.N
-        x = np.arange(N)
-        tmp = []
-        for i in range(self.replicas):
-            tmp.append(self.deltas[i] + self.r_values[i])
-        y = np.concatenate(tmp, axis=0)  # Think about including kwarg to look only at some replica
-        plt.errorbar(x, y, fmt='.', markersize=3)
-        plt.xlim(-0.5, N - 0.5)
-        plt.show()
-
-    def dump(self, name, **kwargs):
-        """Dump the Jack to a pickle file 'name'.
-
-        Keyword arguments:
-        path -- specifies a custom path for the file (default '.')
-        """
-        if 'path' in kwargs:
-            file_name = kwargs.get('path') + '/' + name + '.p'
-        else:
-            file_name = name + '.p'
-        with open(file_name, 'wb') as fb:
-            pickle.dump(self, fb)
-
-
-def generate_jack(obs, **kwargs):
-    full_data = []
-    for r, name in enumerate(obs.names):
-        if r == 0:
-            full_data = obs.deltas[name] + obs.r_values[name]
-        else:
-            full_data = np.append(full_data, obs.deltas[name] + obs.r_values[name])
-
-    jacks = []
-    if 'max_binsize' in kwargs:
-        max_b = kwargs.get('max_binsize')
-        if not isinstance(max_b, int):
-            raise TypeError('max_binsize has to be integer')
-    else:
-        max_b = 1
-
-    for b in range(max_b):
-        # binning if necessary
-        if b > 0:
-            n = full_data.size // (b + 1)
-            binned_data = np.zeros(n)
-            for i in range(n):
-                for j in range(b + 1):
-                    binned_data[i] += full_data[i * (b + 1) + j]
-                binned_data[i] /= (b + 1)
-        else:
-            binned_data = full_data
-            n = binned_data.size
-        # generate jacks from data
-        mean = np.mean(binned_data)
-        tmp_jacks = np.zeros(n)
-        for i in range(n):
-            tmp_jacks[i] = (n * mean - binned_data[i]) / (n - 1)
-        jacks.append(tmp_jacks)
-
-        # Value is not correctly reproduced here
-    return Jack(obs.value, jacks)
-
-
-def derived_jack(func, data, **kwargs):
-    """Construct a derived Jack according to func(data, **kwargs).
-
-    Parameters
-    ----------
-    func -- arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work,
-            all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as np').
-    data -- list of Jacks, e.g. [jack1, jack2, jack3].
-
-    Notes
-    -----
-    For simple mathematical operations it can be practical to use anonymous functions.
-    For the ratio of two jacks one can e.g. use
-
-    new_jack = derived_jack(lambda x : x[0] / x[1], [jack1, jack2])
-
-    """
-
-    # Check shapes of data
-    if not all(x.N == data[0].N for x in data):
-        raise Exception('Error: Shape of data does not fit')
-
-    values = np.zeros(len(data))
-    for j, item in enumerate(data):
-        values[j] = item.value
-    new_value = func(values, **kwargs)
-
-    jacks = []
-    for b in range(data[0].max_binsize):
-        tmp_jacks = np.zeros(data[0].N[b])
-        for i in range(data[0].N[b]):
-            values = np.zeros(len(data))
-            for j, item in enumerate(data):
-                values[j] = item.jacks[b][i]
-            tmp_jacks[i] = func(values, **kwargs)
-        jacks.append(tmp_jacks)
-
-    return Jack(new_value, jacks)
diff --git a/pyerrors/linalg.py b/pyerrors/linalg.py
index bc265efa..210f79fa 100644
--- a/pyerrors/linalg.py
+++ b/pyerrors/linalg.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python
-# coding: utf-8
-
 import numpy as np
 from autograd import jacobian
 import autograd.numpy as anp  # Thinly-wrapped numpy
diff --git a/pyerrors/misc.py b/pyerrors/misc.py
index 7fd0af58..1baa5e53 100644
--- a/pyerrors/misc.py
+++ b/pyerrors/misc.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python
-# coding: utf-8
-
 import numpy as np
 from .obs import Obs
 
diff --git a/pyerrors/mpm.py b/pyerrors/mpm.py
index 00ace703..619d3750 100644
--- a/pyerrors/mpm.py
+++ b/pyerrors/mpm.py
@@ -1,26 +1,26 @@
-#!/usr/bin/env python
-# coding: utf-8
-
 import numpy as np
 import scipy.linalg
 from .obs import Obs
-from .linalg import svd, eig, pinv
+from .linalg import svd, eig
 
 
 def matrix_pencil_method(corrs, k=1, p=None, **kwargs):
-    """ Matrix pencil method to extract k energy levels from data
+    """Matrix pencil method to extract k energy levels from data
 
     Implementation of the matrix pencil method based on
     eq. (2.17) of Y. Hua, T. K. Sarkar, IEEE Trans. Acoust. 38, 814-824 (1990)
 
     Parameters
     ----------
-    data -- can be a list of Obs for the analysis of a single correlator, or a list of lists
-            of Obs if several correlators are to analyzed at once.
-    k -- Number of states to extract (default 1).
-    p -- matrix pencil parameter which filters noise. The optimal value is expected between
-         len(data)/3 and 2*len(data)/3. The computation is more expensive the closer p is
-         to len(data)/2 but could possibly suppress more noise (default len(data)//2).
+    data : list
+        can be a list of Obs for the analysis of a single correlator, or a list of lists
+        of Obs if several correlators are to analyzed at once.
+    k : int
+        Number of states to extract (default 1).
+    p : int
+        matrix pencil parameter which filters noise. The optimal value is expected between
+        len(data)/3 and 2*len(data)/3. The computation is more expensive the closer p is
+        to len(data)/2 but could possibly suppress more noise (default len(data)//2).
     """
     if isinstance(corrs[0], Obs):
         data = [corrs]
@@ -56,55 +56,3 @@ def matrix_pencil_method(corrs, k=1, p=None, **kwargs):
     # Return the sorted logarithms of the real eigenvalues as Obs
     energy_levels = np.log(np.abs(eig(z, **kwargs)))
     return sorted(energy_levels, key=lambda x: abs(x.value))
-
-
-def matrix_pencil_method_old(data, p, noise_level=None, verbose=1, **kwargs):
-    """ Older impleentation of the matrix pencil method with pencil p on given data to
-        extract energy levels.
-
-    Parameters
-    ----------
-    data -- lists of Obs, where the nth entry is considered to be the correlation function
-            at x0=n+offset.
-    p -- matrix pencil parameter which corresponds to the number of energy levels to extract.
-         higher values for p can help decreasing noise.
-    noise_level -- If this argument is not None an additional prefiltering via singular
-                   value decomposition is performed in which all singular values below 10^(-noise_level)
-                   times the largest singular value are discarded. This increases the computation time.
-    verbose -- if larger than zero details about the noise filtering are printed to stdout
-               (default 1)
-
-    """
-    n_data = len(data)
-    if n_data <= p:
-        raise Exception('The pencil p has to be smaller than the number of data samples.')
-
-    matrix = scipy.linalg.hankel(data[:n_data - p], data[n_data - p - 1:]) @ np.identity(p + 1)
-
-    if noise_level is not None:
-        u, s, vh = svd(matrix)
-
-        s_values = np.vectorize(lambda x: x.value)(s)
-        if verbose > 0:
-            print('Singular values: ', s_values)
-        digit = np.argwhere(s_values / s_values[0] < 10.0**(-noise_level))
-        if digit.size == 0:
-            digit = len(s_values)
-        else:
-            digit = int(digit[0])
-        if verbose > 0:
-            print('Consider only', digit, 'out of', len(s), 'singular values')
-
-        new_matrix = u[:, :digit] * s[:digit] @ vh[:digit, :]
-        y1 = new_matrix[:, :-1]
-        y2 = new_matrix[:, 1:]
-    else:
-        y1 = matrix[:, :-1]
-        y2 = matrix[:, 1:]
-
-    # Moore–Penrose pseudoinverse
-    pinv_y1 = pinv(y1)
-
-    e = eig((pinv_y1 @ y2), **kwargs)
-    energy_levels = -np.log(np.abs(e))
-    return sorted(energy_levels, key=lambda x: abs(x.value))
diff --git a/pyerrors/obs.py b/pyerrors/obs.py
index 794e7287..87f40b58 100644
--- a/pyerrors/obs.py
+++ b/pyerrors/obs.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python
-# coding: utf-8
-
 import warnings
 import pickle
 import numpy as np