From 3198088f9cf2c99f0d6e2df1453d7749d2a85f2d Mon Sep 17 00:00:00 2001
From: s-kuberski <simon.kuberski@uni-muenster.de>
Date: Tue, 16 May 2023 19:29:13 +0200
Subject: [PATCH] Feat/flow (#176)

* fix: String conversion of Obs can now handle a dvalue that is NaN or inf

* Feat: Added extraction of w0/a from openQCD files

* Removed unnecessary round in w0 routine

* Improved error handling in fit_t0

* Allowed to change the reference flow time in t0 and w0 determinations.

* Added doc string to fit_t0
---
 pyerrors/input/misc.py    |  44 +++++++++-
 pyerrors/input/openQCD.py | 178 ++++++++++++++++++++++++++++++++++----
 tests/openQCD_in_test.py  |  11 +++
 3 files changed, 216 insertions(+), 17 deletions(-)

diff --git a/pyerrors/input/misc.py b/pyerrors/input/misc.py
index b6cbccf7..c62f502c 100644
--- a/pyerrors/input/misc.py
+++ b/pyerrors/input/misc.py
@@ -2,6 +2,7 @@ import os
 import fnmatch
 import re
 import struct
+import warnings
 import numpy as np  # Thinly-wrapped numpy
 import matplotlib.pyplot as plt
 from matplotlib import gridspec
@@ -9,16 +10,52 @@ from ..obs import Obs
 from ..fits import fit_lin
 
 
-def fit_t0(t2E_dict, fit_range, plot_fit=False):
+def fit_t0(t2E_dict, fit_range, plot_fit=False, observable='t0'):
+    """Compute the root of (flow-based) data based on a dictionary that contains
+    the necessary information in key-value pairs a la (flow time: observable at flow time).
+
+    It is assumed that the data is monotonically increasing and passes zero from below.
+    No exception is thrown if this is not the case (several roots, no monotonic increase).
+    An exception is thrown if no root can be found in the data.
+
+    A linear fit in the vicinity of the root is performed to exctract the root from the
+    two fit parameters.
+
+    Parameters
+    ----------
+    t2E_dict : dict
+        Dictionary with pairs of (flow time: observable at flow time) where the flow times
+        are of type float and the observables of type Obs.
+    fit_range : int
+        Number of data points left and right of the zero
+        crossing to be included in the linear fit.
+    plot_fit : bool
+        If true, the fit for the extraction of t0 is shown together with the data. (Default: False)
+    observable: str
+        Keyword to identify the observable to print the correct ylabel (if plot_fit is True)
+        for the observables 't0' and 'w0'. No y label is printed otherwise. (Default: 't0')
+
+    Returns
+    -------
+    root : Obs
+        The root of the data series.
+    """
+
     zero_crossing = np.argmax(np.array(
         [o.value for o in t2E_dict.values()]) > 0.0)
 
+    if zero_crossing == 0:
+        raise Exception('Desired flow time not in data')
+
     x = list(t2E_dict.keys())[zero_crossing - fit_range:
                               zero_crossing + fit_range]
     y = list(t2E_dict.values())[zero_crossing - fit_range:
                                 zero_crossing + fit_range]
     [o.gamma_method() for o in y]
 
+    if len(x) < 2 * fit_range:
+        warnings.warn('Fit range smaller than expected! Fitting from %1.2e to %1.2e' % (x[0], x[-1]))
+
     fit_result = fit_lin(x, y)
 
     if plot_fit is True:
@@ -38,7 +75,10 @@ def fit_t0(t2E_dict, fit_range, plot_fit=False):
         ylim = ax0.get_ylim()
         ax0.fill_betweenx(ylim, x1=retval.value - retval.dvalue, x2=retval.value + retval.dvalue, color='gray', alpha=0.4)
         ax0.set_ylim(ylim)
-        ax0.set_ylabel(r'$t^2 \langle E(t) \rangle - 0.3 $')
+        if observable == 't0':
+            ax0.set_ylabel(r'$t^2 \langle E(t) \rangle - 0.3 $')
+        elif observable == 'w0':
+            ax0.set_ylabel(r'$t d(t^2 \langle E(t) \rangle)/dt - 0.3 $')
         xlim = ax0.get_xlim()
 
         fit_res = [fit_result[0] + fit_result[1] * xi for xi in x]
diff --git a/pyerrors/input/openQCD.py b/pyerrors/input/openQCD.py
index 3712f3c5..7897c1de 100644
--- a/pyerrors/input/openQCD.py
+++ b/pyerrors/input/openQCD.py
@@ -229,14 +229,12 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
     return result
 
 
-def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, postfix='ms', **kwargs):
-    """Extract t0 from given .ms.dat files. Returns t0 as Obs.
+def _extract_flowed_energy_density(path, prefix, dtr_read, xmin, spatial_extent, postfix='ms', **kwargs):
+    """Extract a dictionary with the flowed Yang-Mills action density from given .ms.dat files.
+    Returns a dictionary with Obs as values and flow times as keys.
 
     It is assumed that all boundary effects have
     sufficiently decayed at x0=xmin.
-    The data around the zero crossing of t^2<E> - 0.3
-    is fitted with a linear function
-    from which the exact root is extracted.
 
     It is assumed that one measurement is performed for each config.
     If this is not the case, the resulting idl, as well as the handling
@@ -258,9 +256,6 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, postfi
         effects have sufficiently decayed.
     spatial_extent : int
         spatial extent of the lattice, required for normalization.
-    fit_range : int
-        Number of data points left and right of the zero
-        crossing to be included in the linear fit. (Default: 5)
     postfix : str
         Postfix of measurement file (Default: ms)
     r_start : list
@@ -278,8 +273,6 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, postfi
     files : list
         list which contains the filenames to be read. No automatic detection of
         files performed if given.
-    plot_fit : bool
-        If true, the fit for the extraction of t0 is shown together with the data.
     assume_thermalization : bool
         If True: If the first record divided by the distance between two measurements is larger than
         1, it is assumed that this is due to thermalization and the first measurement belongs
@@ -288,8 +281,8 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, postfi
 
     Returns
     -------
-    t0 : Obs
-        Extracted t0
+    E_dict : dictionary
+        Dictionary with the flowed action density at flow times t
     """
 
     if 'files' in kwargs:
@@ -321,7 +314,7 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, postfi
     else:
         r_step = 1
 
-    print('Extract t0 from', prefix, ',', replica, 'replica')
+    print('Extract flowed Yang-Mills action density from', prefix, ',', replica, 'replica')
 
     if 'names' in kwargs:
         rep_names = kwargs.get('names')
@@ -415,7 +408,7 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, postfi
         warnings.warn('Stepsize between configurations is greater than one!' + str(stepsizes), RuntimeWarning)
 
     idl = [range(configlist[rep][r_start_index[rep]], configlist[rep][r_stop_index[rep]] + 1, r_step) for rep in range(replica)]
-    t2E_dict = {}
+    E_dict = {}
     for n in range(nn + 1):
         samples = []
         for nrep, rep in enumerate(Ysum):
@@ -424,11 +417,166 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, postfi
                 samples[-1].append(cnfg[n])
             samples[-1] = samples[-1][r_start_index[nrep]:r_stop_index[nrep] + 1][::r_step]
         new_obs = Obs(samples, rep_names, idl=idl)
-        t2E_dict[n * dn * eps] = (n * dn * eps) ** 2 * new_obs / (spatial_extent ** 3) - 0.3
+        E_dict[n * dn * eps] = new_obs / (spatial_extent ** 3)
+
+    return E_dict
+
+
+def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, postfix='ms', c=0.3, **kwargs):
+    """Extract t0/a^2 from given .ms.dat files. Returns t0 as Obs.
+
+    It is assumed that all boundary effects have
+    sufficiently decayed at x0=xmin.
+    The data around the zero crossing of t^2<E> - c (where c=0.3 by default)
+    is fitted with a linear function
+    from which the exact root is extracted.
+
+    It is assumed that one measurement is performed for each config.
+    If this is not the case, the resulting idl, as well as the handling
+    of r_start, r_stop and r_step is wrong and the user has to correct
+    this in the resulting observable.
+
+    Parameters
+    ----------
+    path : str
+        Path to .ms.dat files
+    prefix : str
+        Ensemble prefix
+    dtr_read : int
+        Determines how many trajectories should be skipped
+        when reading the ms.dat files.
+        Corresponds to dtr_cnfg / dtr_ms in the openQCD input file.
+    xmin : int
+        First timeslice where the boundary
+        effects have sufficiently decayed.
+    spatial_extent : int
+        spatial extent of the lattice, required for normalization.
+    fit_range : int
+        Number of data points left and right of the zero
+        crossing to be included in the linear fit. (Default: 5)
+    postfix : str
+        Postfix of measurement file (Default: ms)
+    c: float
+        Constant that defines the flow scale. Default 0.3 for t_0, choose 2./3 for t_1.
+    r_start : list
+        list which contains the first config to be read for each replicum.
+    r_stop : list
+        list which contains the last config to be read for each replicum.
+    r_step : int
+        integer that defines a fixed step size between two measurements (in units of configs)
+        If not given, r_step=1 is assumed.
+    plaquette : bool
+        If true extract the plaquette estimate of t0 instead.
+    names : list
+        list of names that is assigned to the data according according
+        to the order in the file list. Use careful, if you do not provide file names!
+    files : list
+        list which contains the filenames to be read. No automatic detection of
+        files performed if given.
+    plot_fit : bool
+        If true, the fit for the extraction of t0 is shown together with the data.
+    assume_thermalization : bool
+        If True: If the first record divided by the distance between two measurements is larger than
+        1, it is assumed that this is due to thermalization and the first measurement belongs
+        to the first config (default).
+        If False: The config numbers are assumed to be traj_number // difference
+
+    Returns
+    -------
+    t0 : Obs
+        Extracted t0
+    """
+
+    E_dict = _extract_flowed_energy_density(path, prefix, dtr_read, xmin, spatial_extent, postfix, **kwargs)
+    t2E_dict = {}
+    for t in sorted(E_dict.keys()):
+        t2E_dict[t] = t ** 2 * E_dict[t] - c
 
     return fit_t0(t2E_dict, fit_range, plot_fit=kwargs.get('plot_fit'))
 
 
+def extract_w0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, postfix='ms', c=0.3, **kwargs):
+    """Extract w0/a from given .ms.dat files. Returns w0 as Obs.
+
+    It is assumed that all boundary effects have
+    sufficiently decayed at x0=xmin.
+    The data around the zero crossing of t d(t^2<E>)/dt -  (where c=0.3 by default)
+    is fitted with a linear function
+    from which the exact root is extracted.
+
+    It is assumed that one measurement is performed for each config.
+    If this is not the case, the resulting idl, as well as the handling
+    of r_start, r_stop and r_step is wrong and the user has to correct
+    this in the resulting observable.
+
+    Parameters
+    ----------
+    path : str
+        Path to .ms.dat files
+    prefix : str
+        Ensemble prefix
+    dtr_read : int
+        Determines how many trajectories should be skipped
+        when reading the ms.dat files.
+        Corresponds to dtr_cnfg / dtr_ms in the openQCD input file.
+    xmin : int
+        First timeslice where the boundary
+        effects have sufficiently decayed.
+    spatial_extent : int
+        spatial extent of the lattice, required for normalization.
+    fit_range : int
+        Number of data points left and right of the zero
+        crossing to be included in the linear fit. (Default: 5)
+    postfix : str
+        Postfix of measurement file (Default: ms)
+    c: float
+        Constant that defines the flow scale. Default 0.3 for w_0, choose 2./3 for w_1.
+    r_start : list
+        list which contains the first config to be read for each replicum.
+    r_stop : list
+        list which contains the last config to be read for each replicum.
+    r_step : int
+        integer that defines a fixed step size between two measurements (in units of configs)
+        If not given, r_step=1 is assumed.
+    plaquette : bool
+        If true extract the plaquette estimate of w0 instead.
+    names : list
+        list of names that is assigned to the data according according
+        to the order in the file list. Use careful, if you do not provide file names!
+    files : list
+        list which contains the filenames to be read. No automatic detection of
+        files performed if given.
+    plot_fit : bool
+        If true, the fit for the extraction of w0 is shown together with the data.
+    assume_thermalization : bool
+        If True: If the first record divided by the distance between two measurements is larger than
+        1, it is assumed that this is due to thermalization and the first measurement belongs
+        to the first config (default).
+        If False: The config numbers are assumed to be traj_number // difference
+
+    Returns
+    -------
+    w0 : Obs
+        Extracted w0
+    """
+
+    E_dict = _extract_flowed_energy_density(path, prefix, dtr_read, xmin, spatial_extent, postfix, **kwargs)
+
+    ftimes = sorted(E_dict.keys())
+
+    t2E_dict = {}
+    for t in ftimes:
+        t2E_dict[t] = t ** 2 * E_dict[t]
+
+    tdtt2E_dict = {}
+    tdtt2E_dict[ftimes[0]] = ftimes[0] * (t2E_dict[ftimes[1]] - t2E_dict[ftimes[0]]) / (ftimes[1] - ftimes[0]) - c
+    for i in range(1, len(ftimes) - 1):
+        tdtt2E_dict[ftimes[i]] = ftimes[i] * (t2E_dict[ftimes[i + 1]] - t2E_dict[ftimes[i - 1]]) / (ftimes[i + 1] - ftimes[i - 1]) - c
+    tdtt2E_dict[ftimes[-1]] = ftimes[-1] * (t2E_dict[ftimes[-1]] - t2E_dict[ftimes[-2]]) / (ftimes[-1] - ftimes[-2]) - c
+
+    return np.sqrt(fit_t0(tdtt2E_dict, fit_range, plot_fit=kwargs.get('plot_fit'), observable='w0'))
+
+
 def _parse_array_openQCD2(d, n, size, wa, quadrupel=False):
     arr = []
     if d == 2:
diff --git a/tests/openQCD_in_test.py b/tests/openQCD_in_test.py
index 35da5817..dd9f4272 100644
--- a/tests/openQCD_in_test.py
+++ b/tests/openQCD_in_test.py
@@ -54,6 +54,7 @@ def test_rwms():
     files = ['openqcd2r1.ms.dat']
     names = ['openqcd2|r1']
     t0 = pe.input.openQCD.extract_t0(path, '', dtr_read=3, xmin=0, spatial_extent=4, files=files, names=names, fit_range=2)
+    assert(np.isclose(t0.value, 0.3816208266076627))
     t0 = pe.input.openQCD.extract_t0(path, prefix, dtr_read=3, xmin=0, spatial_extent=4, r_start=[1])
     repname = list(rwfo[0].idl.keys())[0]
     assert(t0.idl[repname] == range(1, 10))
@@ -64,6 +65,16 @@ def test_rwms():
 
     pe.input.openQCD.extract_t0(path, '', dtr_read=3, xmin=0, spatial_extent=4, files=files, names=names, fit_range=2, plot_fit=True)
 
+    with pytest.raises(Exception):
+        pe.input.openQCD.extract_t0(path, '', dtr_read=3, xmin=0, spatial_extent=4, files=files, names=names, fit_range=2, c=14)
+    # w0
+
+    w0 = pe.input.openQCD.extract_w0(path, '', dtr_read=3, xmin=0, spatial_extent=4, files=files, names=names, fit_range=2, plot_fit=True)
+    assert(np.isclose(w0.value, 0.5220124285820434))
+
+    with pytest.raises(Exception):
+        pe.input.openQCD.extract_w0(path, '', dtr_read=3, xmin=0, spatial_extent=4, files=files, names=names, fit_range=2, c=14)
+
 
 def test_Qtop():
     path = './tests//data/openqcd_test/'