From 47a4b566176d014d824f896e442364aea0e42b14 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann <82444481+jkuhl-uni@users.noreply.github.com> Date: Mon, 22 May 2023 13:37:46 +0200 Subject: [PATCH 1/9] taking care of cols with only None values (#184) --- pyerrors/input/pandas.py | 30 +++++++++++++++++------------- tests/pandas_test.py | 16 ++++++++++++++++ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py index 911c14d5..13482983 100644 --- a/pyerrors/input/pandas.py +++ b/pyerrors/input/pandas.py @@ -171,26 +171,30 @@ def _deserialize_df(df, auto_gamma=False): if isinstance(df[column][0], bytes): if df[column][0].startswith(b"\x1f\x8b\x08\x00"): df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8')) - df = df.replace({r'^$': None}, regex=True) - i = 0 - while df[column][i] is None: - i += 1 - if isinstance(df[column][i], str): - if '"program":' in df[column][i][:20]: - df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None) - if auto_gamma is True: - if isinstance(df[column][i], list): - df[column].apply(lambda x: [o.gm() if o is not None else x for o in x]) - else: - df[column].apply(lambda x: x.gm() if x is not None else x) + + if not all([e is None for e in df[column]]): + df[column] = df[column].replace({r'^$': None}, regex=True) + i = 0 + while df[column][i] is None: + i += 1 + if isinstance(df[column][i], str): + if '"program":' in df[column][i][:20]: + df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None) + if auto_gamma is True: + if isinstance(df[column][i], list): + df[column].apply(lambda x: [o.gm() if o is not None else x for o in x]) + else: + df[column].apply(lambda x: x.gm() if x is not None else x) return df def _need_to_serialize(col): serialize = False i = 0 - while col[i] is None: + while i < len(col) and col[i] is None: i += 1 + if i == len(col): + return serialize if isinstance(col[i], (Obs, Corr)): serialize = True elif isinstance(col[i], list): diff --git a/tests/pandas_test.py b/tests/pandas_test.py index fdff4f10..3a02b97e 100644 --- a/tests/pandas_test.py +++ b/tests/pandas_test.py @@ -173,6 +173,22 @@ def test_null_second_line_df_sql_export_import(tmp_path): assert np.all(reconstructed_df.loc[2:] == my_df.loc[2:]) +def test_null_col_df_gzsql_export_import(tmp_path): + my_dict = {"int": 1, + "float": -0.01, + "Noneval": None, + "Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"), + "Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")} + my_df = pd.DataFrame([my_dict] * 4) + pe.input.pandas.to_sql(my_df, 'test', (tmp_path / 'test.db').as_posix(), gz=True) + reconstructed_df = pe.input.pandas.read_sql('SELECT * FROM test', (tmp_path / 'test.db').as_posix(), auto_gamma=True) + assert np.all(reconstructed_df["int"] == my_df["int"]) + assert np.all(reconstructed_df["float"] == my_df["float"]) + assert np.all([e is None for e in reconstructed_df["Noneval"]]) + assert np.all(reconstructed_df["Obs1"] == my_df["Obs1"]) + assert np.all(reconstructed_df["Obs2"] == my_df["Obs2"]) + + def test_df_Corr(tmp_path): my_corr = pe.Corr([pe.pseudo_Obs(-0.48, 0.04, "test"), pe.pseudo_Obs(-0.154, 0.03, "test")]) From 0535af992d889568ea37ffae3c4a9d0434e60958 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann <82444481+jkuhl-uni@users.noreply.github.com> Date: Fri, 26 May 2023 14:01:52 +0200 Subject: [PATCH 2/9] Feat/files idl xsf (#185) * added kwrg idl to ms5_xsf_read * change output, warn user if expected idl not found --- pyerrors/input/openQCD.py | 47 ++++++++++++++++++++++++--------------- tests/openQCD_in_test.py | 33 ++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 19 deletions(-) diff --git a/pyerrors/input/openQCD.py b/pyerrors/input/openQCD.py index 7897c1de..36fa21b6 100644 --- a/pyerrors/input/openQCD.py +++ b/pyerrors/input/openQCD.py @@ -1163,6 +1163,8 @@ def read_ms5_xsf(path, prefix, qc, corr, sep="r", **kwargs): Additional keyword arguments. The following keyword arguments are recognized: - names (List[str]): A list of names to use for the replicas. + - files (List[str]): A list of files to read data from. + - idl (List[List[int]]): A list of idls per replicum, resticting data to the idls given. Returns ------- @@ -1211,7 +1213,8 @@ def read_ms5_xsf(path, prefix, qc, corr, sep="r", **kwargs): names.append(se.split(sep)[0] + "|r" + se.split(sep)[1]) else: names.append(prefix) - + if 'idl' in kwargs: + expected_idl = kwargs.get('idl') names = sorted(names) files = sorted(files) @@ -1254,33 +1257,41 @@ def read_ms5_xsf(path, prefix, qc, corr, sep="r", **kwargs): for t in range(tmax): realsamples[repnum].append([]) imagsamples[repnum].append([]) - + if 'idl' in kwargs: + left_idl = set(expected_idl[repnum]) while True: cnfgt = fp.read(chunksize) if not cnfgt: break asascii = struct.unpack(packstr, cnfgt) cnfg = asascii[0] - cnfgs[repnum].append(cnfg) + idl_wanted = True + if 'idl' in kwargs: + idl_wanted = (cnfg in expected_idl[repnum]) + left_idl = left_idl - set([cnfg]) + if idl_wanted: + cnfgs[repnum].append(cnfg) - if corr not in placesBB: - tmpcorr = asascii[1 + 2 * tmax * placesBI.index(corr):1 + 2 * tmax * placesBI.index(corr) + 2 * tmax] - else: - tmpcorr = asascii[1 + 2 * tmax * len(placesBI) + 2 * placesBB.index(corr):1 + 2 * tmax * len(placesBI) + 2 * placesBB.index(corr) + 2] + if corr not in placesBB: + tmpcorr = asascii[1 + 2 * tmax * placesBI.index(corr):1 + 2 * tmax * placesBI.index(corr) + 2 * tmax] + else: + tmpcorr = asascii[1 + 2 * tmax * len(placesBI) + 2 * placesBB.index(corr):1 + 2 * tmax * len(placesBI) + 2 * placesBB.index(corr) + 2] - corrres = [[], []] - for i in range(len(tmpcorr)): - corrres[i % 2].append(tmpcorr[i]) - for t in range(int(len(tmpcorr) / 2)): - realsamples[repnum][t].append(corrres[0][t]) - for t in range(int(len(tmpcorr) / 2)): - imagsamples[repnum][t].append(corrres[1][t]) + corrres = [[], []] + for i in range(len(tmpcorr)): + corrres[i % 2].append(tmpcorr[i]) + for t in range(int(len(tmpcorr) / 2)): + realsamples[repnum][t].append(corrres[0][t]) + for t in range(int(len(tmpcorr) / 2)): + imagsamples[repnum][t].append(corrres[1][t]) + if 'idl' in kwargs: + left_idl = list(left_idl) + if len(left_idl) > 0: + warnings.warn('Could not find idls ' + str(left_idl) + ' in replikum of file ' + file, UserWarning) repnum += 1 - - s = "Read correlator " + corr + " from " + str(repnum) + " replika with " + str(len(realsamples[0][t])) + s = "Read correlator " + corr + " from " + str(repnum) + " replika with idls" + str(realsamples[0][t]) for rep in range(1, repnum): - s += ", " + str(len(realsamples[rep][t])) - s += " samples" + s += ", " + str(realsamples[rep][t]) print(s) print("Asserted run parameters:\n T:", tmax, "kappa:", kappa, "csw:", csw, "dF:", dF, "zF:", zF, "bnd:", bnd) diff --git a/tests/openQCD_in_test.py b/tests/openQCD_in_test.py index dd9f4272..8122dcb8 100644 --- a/tests/openQCD_in_test.py +++ b/tests/openQCD_in_test.py @@ -121,7 +121,7 @@ def test_gf_coupling(): def test_read_ms5_xsf(): - path = './tests//data/openqcd_test/' + path = './tests/data/openqcd_test/' prefix = "ms5_xsf_T24L16" corr = "gA" qc = 'dd' @@ -145,6 +145,37 @@ def test_read_ms5_xsf(): pe.input.openQCD.read_ms5_xsf(path, prefix, qc, fcorr) +def test_read_ms5_xsf_idl(): + path = './tests/data/openqcd_test/' + prefix = "ms5_xsf_T24L16" + corr = "gA" + qc = 'dd' + + c = pe.input.openQCD.read_ms5_xsf(path, prefix, qc, corr, idl=[range(1, 6), range(1, 7), range(1, 8)]) + + assert c.real[12].names == ['ms5_xsf_T24L16|r1', 'ms5_xsf_T24L16|r2', 'ms5_xsf_T24L16|r3'] + + assert (c.real[12].shape['ms5_xsf_T24L16|r1'] == 5) + assert (c.real[12].shape['ms5_xsf_T24L16|r2'] == 6) + assert (c.real[12].shape['ms5_xsf_T24L16|r3'] == 7) + + assert (c.real[12].idl['ms5_xsf_T24L16|r1'] == range(1, 6)) + assert (c.real[12].idl['ms5_xsf_T24L16|r2'] == range(1, 7)) + assert (c.real[12].idl['ms5_xsf_T24L16|r3'] == range(1, 8)) + + c = pe.input.openQCD.read_ms5_xsf(path, prefix, qc, corr, idl=[range(1, 11, 2), range(1, 11, 2), range(1, 11, 2)]) + + assert c.real[12].names == ['ms5_xsf_T24L16|r1', 'ms5_xsf_T24L16|r2', 'ms5_xsf_T24L16|r3'] + + assert (c.real[12].shape['ms5_xsf_T24L16|r1'] == 5) + assert (c.real[12].shape['ms5_xsf_T24L16|r2'] == 5) + assert (c.real[12].shape['ms5_xsf_T24L16|r3'] == 5) + + assert (c.real[12].idl['ms5_xsf_T24L16|r1'] == range(1, 11, 2)) + assert (c.real[12].idl['ms5_xsf_T24L16|r2'] == range(1, 11, 2)) + assert (c.real[12].idl['ms5_xsf_T24L16|r3'] == range(1, 11, 2)) + + def test_find_files(): path = './tests/data/openqcd_test/' prefix = "ms5_xsf_T24L16" From 2acaf1b5db2887003e85bbd2334160565780b323 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 30 May 2023 12:52:10 +0100 Subject: [PATCH 3/9] tests: np.alltrue replaced by np.all. --- tests/obs_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/obs_test.py b/tests/obs_test.py index 084edd64..4bf923c8 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -1096,7 +1096,7 @@ def test_reduce_deltas(): for idx_new in idl: new = pe.obs._reduce_deltas(deltas, idx_old, idx_new) print(new) - assert(np.alltrue([float(i) for i in idx_new] == new)) + assert(np.all([float(i) for i in idx_new] == new)) def test_cobs_array(): From 02b57817ae06304c0e3fd6a69ac5dc02b4071f4f Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 30 May 2023 14:03:01 +0100 Subject: [PATCH 4/9] fix: adjusted maximal value for rho in test_gamma_method_irregular. (#188) --- tests/obs_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/obs_test.py b/tests/obs_test.py index 4bf923c8..73bedd55 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -762,7 +762,7 @@ def test_gamma_method_irregular(): N = 15 for i in range(10): arr = np.random.normal(1, .2, size=N) - for rho in .1 * np.arange(20): + for rho in .05 * np.arange(20): carr = gen_autocorrelated_array(arr, rho) a = pe.Obs([carr], ['a']) a.gm() From b222b116f29777b57394c6079d4cc7830ce2ec27 Mon Sep 17 00:00:00 2001 From: s-kuberski Date: Tue, 30 May 2023 16:29:22 +0200 Subject: [PATCH 5/9] Bug fix for edge case in _compute_drho (#189) --- pyerrors/obs.py | 2 +- tests/obs_test.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index a49320a5..e00a3241 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -278,7 +278,7 @@ class Obs: def _compute_drho(i): tmp = (self.e_rho[e_name][i + 1:w_max] - + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 <= 0 else 2 * (i - w_max // 2):-1], + + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) diff --git a/tests/obs_test.py b/tests/obs_test.py index 73bedd55..d5d8d5e1 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -767,6 +767,17 @@ def test_gamma_method_irregular(): a = pe.Obs([carr], ['a']) a.gm() + arr = np.random.normal(1, .2, size=999) + carr = gen_autocorrelated_array(arr, .8) + o = pe.Obs([carr], ['test']) + o.gamma_method() + no = np.NaN * o + no.gamma_method() + o.idl['test'] = range(1, 1998, 2) + o.gamma_method() + no = np.NaN * o + no.gamma_method() + def test_irregular_gapped_dtauint(): my_idl = list(range(0, 5010, 10)) From be1d8b2290ebf60f38475242654b29d8f94f4a05 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 31 May 2023 16:59:27 +0100 Subject: [PATCH 6/9] fix: Conversion of an array with ndim > 0 to a scalar deprecation fixed. (#186) --- pyerrors/covobs.py | 2 +- pyerrors/input/dobs.py | 2 +- pyerrors/obs.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyerrors/covobs.py b/pyerrors/covobs.py index 49953983..64d9d6ec 100644 --- a/pyerrors/covobs.py +++ b/pyerrors/covobs.py @@ -42,7 +42,7 @@ class Covobs: def errsq(self): """ Return the variance (= square of the error) of the Covobs """ - return float(np.dot(np.transpose(self.grad), np.dot(self.cov, self.grad))) + return np.dot(np.transpose(self.grad), np.dot(self.cov, self.grad)).item() def _set_cov(self, cov): """ Set the covariance matrix of the covobs diff --git a/pyerrors/input/dobs.py b/pyerrors/input/dobs.py index 4757baae..b8b005ff 100644 --- a/pyerrors/input/dobs.py +++ b/pyerrors/input/dobs.py @@ -850,7 +850,7 @@ def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=N for i in range(ncov): for o in obsl: if cname in o.covobs: - val = o.covobs[cname].grad[i] + val = o.covobs[cname].grad[i].item() if val != 0: ds += '%1.14e ' % (val) else: diff --git a/pyerrors/obs.py b/pyerrors/obs.py index e00a3241..66f137d5 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1521,7 +1521,7 @@ def _covariance_element(obs1, obs2): if e_name not in obs2.cov_names: continue - dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) + dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item() return dvalue From d339015cb33a63321aba03dda92851193e2daea9 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 31 May 2023 18:07:38 +0100 Subject: [PATCH 7/9] Fix Obs in f-strings without specifier (#190) * fix: Conversion of an array with ndim > 0 to a scalar deprecation fixed. * fix: adjusted maximal value for rho in test_gamma_method_irregular. * fix: obs in f-strings now work again when no specifier is provided. --- pyerrors/obs.py | 6 +++++- tests/obs_test.py | 8 ++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 66f137d5..7d8f9911 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -694,8 +694,12 @@ class Obs: return _format_uncertainty(self.value, self._dvalue) def __format__(self, format_type): + if format_type == "": + significance = 2 + else: + significance = int(float(format_type.replace("+", "").replace("-", ""))) my_str = _format_uncertainty(self.value, self._dvalue, - significance=int(float(format_type.replace("+", "").replace("-", "")))) + significance=significance) for char in ["+", " "]: if format_type.startswith(char): if my_str[0] != "-": diff --git a/tests/obs_test.py b/tests/obs_test.py index d5d8d5e1..a77c6863 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -1274,3 +1274,11 @@ def test_format(): assert o1.__format__("+3") == '+0.3480(123)' assert o1.__format__("+2") == '+0.348(12)' assert o1.__format__(" 2") == ' 0.348(12)' + +def test_f_string_obs(): + o1 = pe.pseudo_Obs(0.348, 0.0123, "test") + print(f"{o1}") + print(f"{o1:3}") + print(f"{o1:+3}") + print(f"{o1:-1}") + print(f"{o1: 8}") From ff2d4c23e8ff270445380f54678f410e6bb60caf Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 1 Jun 2023 13:32:59 +0100 Subject: [PATCH 8/9] docs: CHANGELOG updated. --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed49fe9a..5ac97253 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ All notable changes to this project will be documented in this file. +## [2.8.1] - 2023-06-01 +### Fixed +- `input.pandas` can now deal with columns that only have `None` entries. +- Bug in f-string conversion of `Obs` fixed. +- Bug in edge case of `_compute_drho` fixed. +- Several numpy 1.25 deprecations fixed. + + ## [2.8.0] - 2023-05-21 ### Added - `pyerrors` can now deal with replica with different gapsizes. From 07c8a1b71da3d8e0713d1d8855bdff882e7845a8 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 1 Jun 2023 13:35:15 +0100 Subject: [PATCH 9/9] build: version bumped to 2.8.1, numpy versions >=1.25 excluded. --- pyerrors/version.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyerrors/version.py b/pyerrors/version.py index 892994aa..b4066b65 100644 --- a/pyerrors/version.py +++ b/pyerrors/version.py @@ -1 +1 @@ -__version__ = "2.8.0" +__version__ = "2.8.1" diff --git a/setup.py b/setup.py index 37a56f51..2d9e2693 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ setup(name='pyerrors', license="MIT", packages=find_packages(), python_requires='>=3.7.0', - install_requires=['numpy>=1.21', 'autograd>=1.5', 'numdifftools>=0.9.41', 'matplotlib>=3.5', 'scipy>=1.7', 'iminuit>=2.17', 'h5py>=3.8', 'lxml>=4.9', 'python-rapidjson>=1.9', 'pandas>=1.1'], + install_requires=['numpy>=1.21,<1.25', 'autograd>=1.5', 'numdifftools>=0.9.41', 'matplotlib>=3.5', 'scipy>=1.7', 'iminuit>=2.17', 'h5py>=3.8', 'lxml>=4.9', 'python-rapidjson>=1.9', 'pandas>=1.1'], extras_require={'test': ['pytest', 'pytest-cov', 'pytest-benchmark', 'hypothesis']}, classifiers=[ 'Development Status :: 5 - Production/Stable',