pyerrors.jackknifing
View Source
#!/usr/bin/env python # coding: utf-8 import pickle import matplotlib.pyplot as plt import numpy as np def _jack_error(jack): n = jack.size mean = np.mean(jack) error = 0 for i in range(n): error += (jack[i] - mean) ** 2 return np.sqrt((n - 1) / n * error) class Jack: def __init__(self, value, jacks): self.jacks = jacks self.N = list(map(np.size, self.jacks)) self.max_binsize = len(self.N) self.value = value # list(map(np.mean, self.jacks)) self.dvalue = list(map(_jack_error, self.jacks)) def print(self, **kwargs): """Print basic properties of the Jack.""" if 'binsize' in kwargs: b = kwargs.get('binsize') - 1 if b == -1: b = 0 if not isinstance(b, int): raise TypeError('binsize has to be integer') if b + 1 > self.max_binsize: raise Exception('Chosen binsize not calculated') else: b = 0 print('Result:\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self.dvalue[b], self.dvalue[b] * np.sqrt(2 * b / self.N[0]), np.abs(self.dvalue[b] / self.value * 100))) def plot_tauint(self): plt.xlabel('binsize') plt.ylabel('tauint') length = self.max_binsize x = np.arange(length) + 1 plt.errorbar(x[:], (self.dvalue[:] / self.dvalue[0]) ** 2 / 2, yerr=np.sqrt(((2 * (self.dvalue[:] / self.dvalue[0]) ** 2 * np.sqrt(2 * x[:] / self.N[0])) / 2) ** 2 + ((2 * (self.dvalue[:] / self.dvalue[0]) ** 2 * np.sqrt(2 / self.N[0])) / 2) ** 2), linewidth=1, capsize=2) plt.xlim(0.5, length + 0.5) plt.title('Tauint') plt.show() def plot_history(self): N = self.N x = np.arange(N) tmp = [] for i in range(self.replicas): tmp.append(self.deltas[i] + self.r_values[i]) y = np.concatenate(tmp, axis=0) # Think about including kwarg to look only at some replica plt.errorbar(x, y, fmt='.', markersize=3) plt.xlim(-0.5, N - 0.5) plt.show() def dump(self, name, **kwargs): """Dump the Jack to a pickle file 'name'. Keyword arguments: path -- specifies a custom path for the file (default '.') """ if 'path' in kwargs: file_name = kwargs.get('path') + '/' + name + '.p' else: file_name = name + '.p' with open(file_name, 'wb') as fb: pickle.dump(self, fb) def generate_jack(obs, **kwargs): full_data = [] for r, name in enumerate(obs.names): if r == 0: full_data = obs.deltas[name] + obs.r_values[name] else: full_data = np.append(full_data, obs.deltas[name] + obs.r_values[name]) jacks = [] if 'max_binsize' in kwargs: max_b = kwargs.get('max_binsize') if not isinstance(max_b, int): raise TypeError('max_binsize has to be integer') else: max_b = 1 for b in range(max_b): # binning if necessary if b > 0: n = full_data.size // (b + 1) binned_data = np.zeros(n) for i in range(n): for j in range(b + 1): binned_data[i] += full_data[i * (b + 1) + j] binned_data[i] /= (b + 1) else: binned_data = full_data n = binned_data.size # generate jacks from data mean = np.mean(binned_data) tmp_jacks = np.zeros(n) for i in range(n): tmp_jacks[i] = (n * mean - binned_data[i]) / (n - 1) jacks.append(tmp_jacks) # Value is not correctly reproduced here return Jack(obs.value, jacks) def derived_jack(func, data, **kwargs): """Construct a derived Jack according to func(data, **kwargs). Parameters ---------- func -- arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as np'). data -- list of Jacks, e.g. [jack1, jack2, jack3]. Notes ----- For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two jacks one can e.g. use new_jack = derived_jack(lambda x : x[0] / x[1], [jack1, jack2]) """ # Check shapes of data if not all(x.N == data[0].N for x in data): raise Exception('Error: Shape of data does not fit') values = np.zeros(len(data)) for j, item in enumerate(data): values[j] = item.value new_value = func(values, **kwargs) jacks = [] for b in range(data[0].max_binsize): tmp_jacks = np.zeros(data[0].N[b]) for i in range(data[0].N[b]): values = np.zeros(len(data)) for j, item in enumerate(data): values[j] = item.jacks[b][i] tmp_jacks[i] = func(values, **kwargs) jacks.append(tmp_jacks) return Jack(new_value, jacks)
View Source
class Jack: def __init__(self, value, jacks): self.jacks = jacks self.N = list(map(np.size, self.jacks)) self.max_binsize = len(self.N) self.value = value # list(map(np.mean, self.jacks)) self.dvalue = list(map(_jack_error, self.jacks)) def print(self, **kwargs): """Print basic properties of the Jack.""" if 'binsize' in kwargs: b = kwargs.get('binsize') - 1 if b == -1: b = 0 if not isinstance(b, int): raise TypeError('binsize has to be integer') if b + 1 > self.max_binsize: raise Exception('Chosen binsize not calculated') else: b = 0 print('Result:\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self.dvalue[b], self.dvalue[b] * np.sqrt(2 * b / self.N[0]), np.abs(self.dvalue[b] / self.value * 100))) def plot_tauint(self): plt.xlabel('binsize') plt.ylabel('tauint') length = self.max_binsize x = np.arange(length) + 1 plt.errorbar(x[:], (self.dvalue[:] / self.dvalue[0]) ** 2 / 2, yerr=np.sqrt(((2 * (self.dvalue[:] / self.dvalue[0]) ** 2 * np.sqrt(2 * x[:] / self.N[0])) / 2) ** 2 + ((2 * (self.dvalue[:] / self.dvalue[0]) ** 2 * np.sqrt(2 / self.N[0])) / 2) ** 2), linewidth=1, capsize=2) plt.xlim(0.5, length + 0.5) plt.title('Tauint') plt.show() def plot_history(self): N = self.N x = np.arange(N) tmp = [] for i in range(self.replicas): tmp.append(self.deltas[i] + self.r_values[i]) y = np.concatenate(tmp, axis=0) # Think about including kwarg to look only at some replica plt.errorbar(x, y, fmt='.', markersize=3) plt.xlim(-0.5, N - 0.5) plt.show() def dump(self, name, **kwargs): """Dump the Jack to a pickle file 'name'. Keyword arguments: path -- specifies a custom path for the file (default '.') """ if 'path' in kwargs: file_name = kwargs.get('path') + '/' + name + '.p' else: file_name = name + '.p' with open(file_name, 'wb') as fb: pickle.dump(self, fb)
View Source
def __init__(self, value, jacks): self.jacks = jacks self.N = list(map(np.size, self.jacks)) self.max_binsize = len(self.N) self.value = value # list(map(np.mean, self.jacks)) self.dvalue = list(map(_jack_error, self.jacks))
View Source
def print(self, **kwargs): """Print basic properties of the Jack.""" if 'binsize' in kwargs: b = kwargs.get('binsize') - 1 if b == -1: b = 0 if not isinstance(b, int): raise TypeError('binsize has to be integer') if b + 1 > self.max_binsize: raise Exception('Chosen binsize not calculated') else: b = 0 print('Result:\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self.dvalue[b], self.dvalue[b] * np.sqrt(2 * b / self.N[0]), np.abs(self.dvalue[b] / self.value * 100)))
Print basic properties of the Jack.
View Source
def plot_tauint(self): plt.xlabel('binsize') plt.ylabel('tauint') length = self.max_binsize x = np.arange(length) + 1 plt.errorbar(x[:], (self.dvalue[:] / self.dvalue[0]) ** 2 / 2, yerr=np.sqrt(((2 * (self.dvalue[:] / self.dvalue[0]) ** 2 * np.sqrt(2 * x[:] / self.N[0])) / 2) ** 2 + ((2 * (self.dvalue[:] / self.dvalue[0]) ** 2 * np.sqrt(2 / self.N[0])) / 2) ** 2), linewidth=1, capsize=2) plt.xlim(0.5, length + 0.5) plt.title('Tauint') plt.show()
View Source
def plot_history(self): N = self.N x = np.arange(N) tmp = [] for i in range(self.replicas): tmp.append(self.deltas[i] + self.r_values[i]) y = np.concatenate(tmp, axis=0) # Think about including kwarg to look only at some replica plt.errorbar(x, y, fmt='.', markersize=3) plt.xlim(-0.5, N - 0.5) plt.show()
View Source
def dump(self, name, **kwargs): """Dump the Jack to a pickle file 'name'. Keyword arguments: path -- specifies a custom path for the file (default '.') """ if 'path' in kwargs: file_name = kwargs.get('path') + '/' + name + '.p' else: file_name = name + '.p' with open(file_name, 'wb') as fb: pickle.dump(self, fb)
Dump the Jack to a pickle file 'name'.
Keyword arguments: path -- specifies a custom path for the file (default '.')
View Source
def generate_jack(obs, **kwargs): full_data = [] for r, name in enumerate(obs.names): if r == 0: full_data = obs.deltas[name] + obs.r_values[name] else: full_data = np.append(full_data, obs.deltas[name] + obs.r_values[name]) jacks = [] if 'max_binsize' in kwargs: max_b = kwargs.get('max_binsize') if not isinstance(max_b, int): raise TypeError('max_binsize has to be integer') else: max_b = 1 for b in range(max_b): # binning if necessary if b > 0: n = full_data.size // (b + 1) binned_data = np.zeros(n) for i in range(n): for j in range(b + 1): binned_data[i] += full_data[i * (b + 1) + j] binned_data[i] /= (b + 1) else: binned_data = full_data n = binned_data.size # generate jacks from data mean = np.mean(binned_data) tmp_jacks = np.zeros(n) for i in range(n): tmp_jacks[i] = (n * mean - binned_data[i]) / (n - 1) jacks.append(tmp_jacks) # Value is not correctly reproduced here return Jack(obs.value, jacks)
View Source
def derived_jack(func, data, **kwargs): """Construct a derived Jack according to func(data, **kwargs). Parameters ---------- func -- arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as np'). data -- list of Jacks, e.g. [jack1, jack2, jack3]. Notes ----- For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two jacks one can e.g. use new_jack = derived_jack(lambda x : x[0] / x[1], [jack1, jack2]) """ # Check shapes of data if not all(x.N == data[0].N for x in data): raise Exception('Error: Shape of data does not fit') values = np.zeros(len(data)) for j, item in enumerate(data): values[j] = item.value new_value = func(values, **kwargs) jacks = [] for b in range(data[0].max_binsize): tmp_jacks = np.zeros(data[0].N[b]) for i in range(data[0].N[b]): values = np.zeros(len(data)) for j, item in enumerate(data): values[j] = item.jacks[b][i] tmp_jacks[i] = func(values, **kwargs) jacks.append(tmp_jacks) return Jack(new_value, jacks)
Construct a derived Jack according to func(data, **kwargs).
Parameters
- func -- arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work,: all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as np').
- data -- list of Jacks, e.g. [jack1, jack2, jack3].
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two jacks one can e.g. use
new_jack = derived_jack(lambda x : x[0] / x[1], [jack1, jack2])