From e1a760a058dd379b929a4623226b82b52516715f Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 4 Jul 2022 12:04:19 +0100 Subject: [PATCH 01/14] refactor: refactored pandas serialize and deserialize functions. --- pyerrors/input/pandas.py | 47 ++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py index caf3e0b6..699392a0 100644 --- a/pyerrors/input/pandas.py +++ b/pyerrors/input/pandas.py @@ -22,10 +22,7 @@ def dump_df(df, fname, gz=True): If True, the output is a gzipped csv file. If False, the output is a csv file. """ - out = df.copy() - for column in out: - if isinstance(out[column][0], (Obs, Corr)): - out[column] = out[column].transform(lambda x: create_json_string(x, indent=0)) + out = serialize_df(df) if not fname.endswith('.csv'): fname += '.csv' @@ -65,11 +62,39 @@ def load_df(fname, auto_gamma=False, gz=True): warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) re_import = pd.read_csv(fname) - for column in re_import.select_dtypes(include="object"): - if isinstance(re_import[column][0], str): - if re_import[column][0][:20] == '{"program":"pyerrors': - re_import[column] = re_import[column].transform(lambda x: import_json_string(x, verbose=False)) - if auto_gamma is True: - re_import[column].apply(lambda x: x.gamma_method()) + return deserialize_df(re_import, auto_gamma) - return re_import + +def serialize_df(df): + """Serializes all Obs or Corr valued columns into json strings according to the pyerrors json specification. + + Parameters + ---------- + df : pandas.DataFrame + DataFrame to be serilized. + """ + out = df.copy() + for column in out: + if isinstance(out[column][0], (Obs, Corr)): + out[column] = out[column].transform(lambda x: create_json_string(x, indent=0)) + return out + + +def deserialize_df(df, auto_gamma=False): + """Deserializes all pyerrors json strings into Obs or Corr objects according to the pyerrors json specification. + + Parameters + ---------- + df : pandas.DataFrame + DataFrame to be deserilized. + auto_gamma : bool + If True applies the gamma_method to all imported Obs objects with the default parameters for + the error analysis. Default False. + """ + for column in df.select_dtypes(include="object"): + if isinstance(df[column][0], str): + if df[column][0][:20] == '{"program":"pyerrors': + df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False)) + if auto_gamma is True: + df[column].apply(lambda x: x.gamma_method()) + return df From a1330301145808f3274634bfe360a3b8d9b1a61a Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 4 Jul 2022 13:17:57 +0100 Subject: [PATCH 02/14] tests: additional tests for pandas csv import and export added. --- tests/pandas_test.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/pandas_test.py b/tests/pandas_test.py index 658f4375..976a4bf1 100644 --- a/tests/pandas_test.py +++ b/tests/pandas_test.py @@ -28,3 +28,17 @@ def test_df_Corr(tmp_path): pe.input.pandas.dump_df(my_df, (tmp_path / 'df_output').as_posix()) reconstructed_df = pe.input.pandas.load_df((tmp_path / 'df_output').as_posix(), auto_gamma=True) + + +def test_default_export_pe_import(tmp_path): + df = pd.DataFrame([{"Column1": 1.1, "Column2": 2, "Column3": "my string£"}]) + df.to_csv((tmp_path / 'plain_df.csv').as_posix(), index=False) + re_df = pe.input.pandas.load_df((tmp_path / 'plain_df').as_posix(), gz=False) + assert np.all(df == re_df) + + +def test_pe_export_default_import(tmp_path): + df = pd.DataFrame([{"Column1": 1.1, "Column2": 2, "Column3": "my string£"}]) + pe.input.pandas.dump_df(df, (tmp_path / 'pe_df').as_posix(), gz=False) + re_df = pd.read_csv((tmp_path / 'pe_df.csv').as_posix()) + assert np.all(df == re_df) From cc9f47c6860b9c31127921975d6c69da38e41733 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 4 Jul 2022 14:19:30 +0100 Subject: [PATCH 03/14] feat: pandas DataFrame serialization and deserialization can now also deal with gzipped json columns. Tests added. --- pyerrors/input/pandas.py | 17 ++++++++++++++--- tests/pandas_test.py | 9 +++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py index 699392a0..24296781 100644 --- a/pyerrors/input/pandas.py +++ b/pyerrors/input/pandas.py @@ -22,7 +22,7 @@ def dump_df(df, fname, gz=True): If True, the output is a gzipped csv file. If False, the output is a csv file. """ - out = serialize_df(df) + out = serialize_df(df, gz=False) if not fname.endswith('.csv'): fname += '.csv' @@ -62,21 +62,25 @@ def load_df(fname, auto_gamma=False, gz=True): warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) re_import = pd.read_csv(fname) - return deserialize_df(re_import, auto_gamma) + return deserialize_df(re_import, auto_gamma=auto_gamma) -def serialize_df(df): +def serialize_df(df, gz=False): """Serializes all Obs or Corr valued columns into json strings according to the pyerrors json specification. Parameters ---------- df : pandas.DataFrame DataFrame to be serilized. + gz: bool + gzip the json string represenation. Default False. """ out = df.copy() for column in out: if isinstance(out[column][0], (Obs, Corr)): out[column] = out[column].transform(lambda x: create_json_string(x, indent=0)) + if gz is True: + out[column] = out[column].transform(lambda x: gzip.compress(x.encode('utf-8'))) return out @@ -90,8 +94,15 @@ def deserialize_df(df, auto_gamma=False): auto_gamma : bool If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False. + + Notes: + ------ + In case any column of the DataFrame is gzipped it is gunzipped in the process. """ for column in df.select_dtypes(include="object"): + if isinstance(df[column][0], bytes): + if df[column][0].startswith(b"\x1f\x8b\x08\x00"): + df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8')) if isinstance(df[column][0], str): if df[column][0][:20] == '{"program":"pyerrors': df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False)) diff --git a/tests/pandas_test.py b/tests/pandas_test.py index 976a4bf1..dec98456 100644 --- a/tests/pandas_test.py +++ b/tests/pandas_test.py @@ -42,3 +42,12 @@ def test_pe_export_default_import(tmp_path): pe.input.pandas.dump_df(df, (tmp_path / 'pe_df').as_posix(), gz=False) re_df = pd.read_csv((tmp_path / 'pe_df.csv').as_posix()) assert np.all(df == re_df) + + +def test_gz_serialization(): + my_obs = pe.pseudo_Obs(0.1, 0.01, "pandas DataFrame ensemble only for test purposes.") + my_df = pd.DataFrame([{"Label": 1, "Obs": my_obs}]) + for gz in [False, True]: + ser = pe.input.pandas.serialize_df(my_df, gz=gz) + deser = pe.input.pandas.deserialize_df(ser) + np.all(my_df == deser) From dc45894b644a3e85fa81d1de1b11b14fadce13dd Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 4 Jul 2022 14:47:01 +0100 Subject: [PATCH 04/14] fix: json format detection in deserialization of pandas dataframe made more general and less dependant on the json implementation. --- pyerrors/input/pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py index 24296781..867554ae 100644 --- a/pyerrors/input/pandas.py +++ b/pyerrors/input/pandas.py @@ -104,7 +104,7 @@ def deserialize_df(df, auto_gamma=False): if df[column][0].startswith(b"\x1f\x8b\x08\x00"): df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8')) if isinstance(df[column][0], str): - if df[column][0][:20] == '{"program":"pyerrors': + if '"program":' in df[column][0][:20]: df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False)) if auto_gamma is True: df[column].apply(lambda x: x.gamma_method()) From 265bad3ed8da76b0f91d50b0157d94d19046e101 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 4 Jul 2022 14:56:59 +0100 Subject: [PATCH 05/14] feat: sqlite functionality added to pandas submodule. --- pyerrors/input/pandas.py | 43 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py index 867554ae..388ca680 100644 --- a/pyerrors/input/pandas.py +++ b/pyerrors/input/pandas.py @@ -6,6 +6,47 @@ from ..correlators import Corr from .json import create_json_string, import_json_string +def to_sql(df, table_name, db, if_exists="replace", gz=True): + """Write DataFrame inlcuding Obs or Corr valued columns to sqlite database. + + Parameters + ---------- + df : pandas.DataFrame + Dataframe to be written to the database. + table_name : str + Name of the table in the database. + db : str + Path to the sqlite database. + if exists : str + How to behave if table already exists. Options 'fail', 'replace', 'append'. + gz : bool + If True the json strings are gzipped. + """ + se_df = pe.input.pandas.serialize_df(df, gz=gz) + con = sqlite3.connect(db) + se_df.to_sql(table_name, con, if_exists=if_exists) + con.close() + + +def read_sql_query(sql, db, auto_gamma=False): + """Write DataFrame inlcuding Obs or Corr valued columns to sqlite database. + + Parameters + ---------- + sql : str + SQL query to be executed. + db : str + Path to the sqlite database. + auto_gamma : bool + If True applies the gamma_method to all imported Obs objects with the default parameters for + the error analysis. Default False. + """ + con = sqlite3.connect(db) + extract_df = pd.read_sql_query(sql, con) + con.close() + return pe.input.pandas.deserialize_df(extract_df, auto_gamma=auto_gamma) + + def dump_df(df, fname, gz=True): """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file. @@ -21,7 +62,6 @@ def dump_df(df, fname, gz=True): gz : bool If True, the output is a gzipped csv file. If False, the output is a csv file. """ - out = serialize_df(df, gz=False) if not fname.endswith('.csv'): @@ -48,7 +88,6 @@ def load_df(fname, auto_gamma=False, gz=True): gz : bool If True, assumes that data is gzipped. If False, assumes JSON file. """ - if not fname.endswith('.csv') and not fname.endswith('.gz'): fname += '.csv' From f58ef1184574012999ed986325c03f69e3d2bfd6 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 4 Jul 2022 14:59:17 +0100 Subject: [PATCH 06/14] refactor: pandas serialize and deserialize removed from global namespace. --- pyerrors/input/pandas.py | 12 ++++++------ tests/pandas_test.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py index 388ca680..62cf5ed9 100644 --- a/pyerrors/input/pandas.py +++ b/pyerrors/input/pandas.py @@ -22,7 +22,7 @@ def to_sql(df, table_name, db, if_exists="replace", gz=True): gz : bool If True the json strings are gzipped. """ - se_df = pe.input.pandas.serialize_df(df, gz=gz) + se_df = _serialize_df(df, gz=gz) con = sqlite3.connect(db) se_df.to_sql(table_name, con, if_exists=if_exists) con.close() @@ -44,7 +44,7 @@ def read_sql_query(sql, db, auto_gamma=False): con = sqlite3.connect(db) extract_df = pd.read_sql_query(sql, con) con.close() - return pe.input.pandas.deserialize_df(extract_df, auto_gamma=auto_gamma) + return _deserialize_df(extract_df, auto_gamma=auto_gamma) def dump_df(df, fname, gz=True): @@ -62,7 +62,7 @@ def dump_df(df, fname, gz=True): gz : bool If True, the output is a gzipped csv file. If False, the output is a csv file. """ - out = serialize_df(df, gz=False) + out = _serialize_df(df, gz=False) if not fname.endswith('.csv'): fname += '.csv' @@ -101,10 +101,10 @@ def load_df(fname, auto_gamma=False, gz=True): warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) re_import = pd.read_csv(fname) - return deserialize_df(re_import, auto_gamma=auto_gamma) + return _deserialize_df(re_import, auto_gamma=auto_gamma) -def serialize_df(df, gz=False): +def _serialize_df(df, gz=False): """Serializes all Obs or Corr valued columns into json strings according to the pyerrors json specification. Parameters @@ -123,7 +123,7 @@ def serialize_df(df, gz=False): return out -def deserialize_df(df, auto_gamma=False): +def _deserialize_df(df, auto_gamma=False): """Deserializes all pyerrors json strings into Obs or Corr objects according to the pyerrors json specification. Parameters diff --git a/tests/pandas_test.py b/tests/pandas_test.py index dec98456..059010c9 100644 --- a/tests/pandas_test.py +++ b/tests/pandas_test.py @@ -48,6 +48,6 @@ def test_gz_serialization(): my_obs = pe.pseudo_Obs(0.1, 0.01, "pandas DataFrame ensemble only for test purposes.") my_df = pd.DataFrame([{"Label": 1, "Obs": my_obs}]) for gz in [False, True]: - ser = pe.input.pandas.serialize_df(my_df, gz=gz) - deser = pe.input.pandas.deserialize_df(ser) + ser = pe.input.pandas._serialize_df(my_df, gz=gz) + deser = pe.input.pandas._deserialize_df(ser) np.all(my_df == deser) From 54789f7fe617a04ce93162af552b5a782f3866e1 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 4 Jul 2022 16:54:51 +0100 Subject: [PATCH 07/14] build: sqlite3 added as requirement. --- pyerrors/input/pandas.py | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py index 62cf5ed9..9c32b3ba 100644 --- a/pyerrors/input/pandas.py +++ b/pyerrors/input/pandas.py @@ -1,5 +1,6 @@ import warnings import gzip +import sqlite3 import pandas as pd from ..obs import Obs from ..correlators import Corr diff --git a/setup.py b/setup.py index 33bde5bc..739acaeb 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ setup(name='pyerrors', license="MIT", packages=find_packages(), python_requires='>=3.6.0', - install_requires=['numpy>=1.16', 'autograd>=1.4', 'numdifftools', 'matplotlib>=3.3', 'scipy>=1', 'iminuit>=2', 'h5py>=3', 'lxml>=4', 'python-rapidjson>=1', 'pandas>=1.1'], + install_requires=['numpy>=1.16', 'autograd>=1.4', 'numdifftools', 'matplotlib>=3.3', 'scipy>=1', 'iminuit>=2', 'h5py>=3', 'lxml>=4', 'python-rapidjson>=1', 'pandas>=1.1', 'sqlite3>=0.4'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Science/Research', From 504a8f34869cc20d899e02e5e90a730f99f79900 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 4 Jul 2022 16:59:24 +0100 Subject: [PATCH 08/14] tests: test_import_non_json_string added. --- tests/json_io_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/json_io_test.py b/tests/json_io_test.py index 5474c8ce..b161ea5c 100644 --- a/tests/json_io_test.py +++ b/tests/json_io_test.py @@ -1,5 +1,6 @@ import os import gzip +import rapidjson import numpy as np import pyerrors as pe import pyerrors.input.json as jsonio @@ -378,6 +379,11 @@ def test_reconstruct_non_linear_r_obs_list(tmp_path): assert assert_equal_Obs(oa, ob) +def test_import_non_json_string(): + with pytest.raises(rapidjson.JSONDecodeError): + pe.input.json.import_json_string("this is garbage") + + def assert_equal_Obs(to, ro): for kw in ["N", "cov_names", "covobs", "ddvalue", "dvalue", "e_content", "e_names", "idl", "mc_names", "names", From 845b4e60ac95bce86d150deb4f61103a39c0505f Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 4 Jul 2022 17:08:20 +0100 Subject: [PATCH 09/14] fix: pandas.to_sql does not write an index to the db anymore, docs extended, test added. --- pyerrors/input/pandas.py | 6 +++--- tests/pandas_test.py | 10 ++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py index 9c32b3ba..1488dac0 100644 --- a/pyerrors/input/pandas.py +++ b/pyerrors/input/pandas.py @@ -8,7 +8,7 @@ from .json import create_json_string, import_json_string def to_sql(df, table_name, db, if_exists="replace", gz=True): - """Write DataFrame inlcuding Obs or Corr valued columns to sqlite database. + """Write DataFrame including Obs or Corr valued columns to sqlite database. Parameters ---------- @@ -25,12 +25,12 @@ def to_sql(df, table_name, db, if_exists="replace", gz=True): """ se_df = _serialize_df(df, gz=gz) con = sqlite3.connect(db) - se_df.to_sql(table_name, con, if_exists=if_exists) + se_df.to_sql(table_name, con, if_exists=if_exists, index=False) con.close() def read_sql_query(sql, db, auto_gamma=False): - """Write DataFrame inlcuding Obs or Corr valued columns to sqlite database. + """Execute SQL query on sqlite database and obatin DataFrame including Obs or Corr valued columns. Parameters ---------- diff --git a/tests/pandas_test.py b/tests/pandas_test.py index 059010c9..4d34d4e0 100644 --- a/tests/pandas_test.py +++ b/tests/pandas_test.py @@ -51,3 +51,13 @@ def test_gz_serialization(): ser = pe.input.pandas._serialize_df(my_df, gz=gz) deser = pe.input.pandas._deserialize_df(ser) np.all(my_df == deser) + + +def test_sql(tmp_path): + my_list = [{"Label": i, "Obs": pe.pseudo_Obs(5 * np.exp(-0.2 * i), 0.01, "test_ensemble", 20)} for i in range(150)] + pe_df = pd.DataFrame(my_list) + my_db = (tmp_path / "test_db.sqlite").as_posix() + pe.input.pandas.to_sql(pe_df, "My_table", my_db) + for auto_gamma in [False, True]: + re_df = pe.input.pandas.read_sql_query("SELECT * from My_table", my_db, auto_gamma=auto_gamma) + assert np.all(re_df == pe_df) From 03d70d3757a68510626770e453c0d5b9e52230ea Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 4 Jul 2022 17:09:53 +0100 Subject: [PATCH 10/14] tests: missing assert added to pandas test. --- tests/pandas_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/pandas_test.py b/tests/pandas_test.py index 4d34d4e0..29d8df86 100644 --- a/tests/pandas_test.py +++ b/tests/pandas_test.py @@ -50,11 +50,11 @@ def test_gz_serialization(): for gz in [False, True]: ser = pe.input.pandas._serialize_df(my_df, gz=gz) deser = pe.input.pandas._deserialize_df(ser) - np.all(my_df == deser) + assert np.all(my_df == deser) def test_sql(tmp_path): - my_list = [{"Label": i, "Obs": pe.pseudo_Obs(5 * np.exp(-0.2 * i), 0.01, "test_ensemble", 20)} for i in range(150)] + my_list = [{"Label": i, "Obs": pe.pseudo_Obs(5 * np.exp(-0.2 * i), 0.01, "test_ensemble", 20)} for i in range(15)] pe_df = pd.DataFrame(my_list) my_db = (tmp_path / "test_db.sqlite").as_posix() pe.input.pandas.to_sql(pe_df, "My_table", my_db) From a6ebcb59bb480780cb1736545f3db5772cd11f4f Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 4 Jul 2022 17:19:36 +0100 Subject: [PATCH 11/14] feat: default value for if_exist in to_sql changed to fail, test for this behavior added. --- pyerrors/input/pandas.py | 6 +++--- tests/pandas_test.py | 11 +++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py index 1488dac0..4b0e1d99 100644 --- a/pyerrors/input/pandas.py +++ b/pyerrors/input/pandas.py @@ -7,7 +7,7 @@ from ..correlators import Corr from .json import create_json_string, import_json_string -def to_sql(df, table_name, db, if_exists="replace", gz=True): +def to_sql(df, table_name, db, if_exists='fail', gz=True): """Write DataFrame including Obs or Corr valued columns to sqlite database. Parameters @@ -30,7 +30,7 @@ def to_sql(df, table_name, db, if_exists="replace", gz=True): def read_sql_query(sql, db, auto_gamma=False): - """Execute SQL query on sqlite database and obatin DataFrame including Obs or Corr valued columns. + """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns. Parameters ---------- @@ -113,7 +113,7 @@ def _serialize_df(df, gz=False): df : pandas.DataFrame DataFrame to be serilized. gz: bool - gzip the json string represenation. Default False. + gzip the json string representation. Default False. """ out = df.copy() for column in out: diff --git a/tests/pandas_test.py b/tests/pandas_test.py index 29d8df86..22dc74a2 100644 --- a/tests/pandas_test.py +++ b/tests/pandas_test.py @@ -1,6 +1,7 @@ import numpy as np import pandas as pd import pyerrors as pe +import pytest def test_df_export_import(tmp_path): my_dict = {"int": 1, @@ -61,3 +62,13 @@ def test_sql(tmp_path): for auto_gamma in [False, True]: re_df = pe.input.pandas.read_sql_query("SELECT * from My_table", my_db, auto_gamma=auto_gamma) assert np.all(re_df == pe_df) + + +def test_sql_if_exists_fail(tmp_path): + pe_df = pd.DataFrame([{"Label": 1, "Obs": pe.pseudo_Obs(5 * np.exp(-0.2), 0.01, "test_ensemble", 20)}]) + my_db = (tmp_path / "test_db.sqlite").as_posix() + pe.input.pandas.to_sql(pe_df, "My_table", my_db) + with pytest.raises(ValueError): + pe.input.pandas.to_sql(pe_df, "My_table", my_db) + pe.input.pandas.to_sql(pe_df, "My_table", my_db, if_exists='append') + pe.input.pandas.to_sql(pe_df, "My_table", my_db, if_exists='replace') From 1d492dfa672c99e2b1a3e384887a789ccd08a9f5 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 4 Jul 2022 17:27:36 +0100 Subject: [PATCH 12/14] build: sqlite dependency fixed. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 739acaeb..61f27767 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ setup(name='pyerrors', license="MIT", packages=find_packages(), python_requires='>=3.6.0', - install_requires=['numpy>=1.16', 'autograd>=1.4', 'numdifftools', 'matplotlib>=3.3', 'scipy>=1', 'iminuit>=2', 'h5py>=3', 'lxml>=4', 'python-rapidjson>=1', 'pandas>=1.1', 'sqlite3>=0.4'], + install_requires=['numpy>=1.16', 'autograd>=1.4', 'numdifftools', 'matplotlib>=3.3', 'scipy>=1', 'iminuit>=2', 'h5py>=3', 'lxml>=4', 'python-rapidjson>=1', 'pandas>=1.1', 'pysqlite3>=0.4'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Science/Research', From de51d6ca07f1a9169ada6ecc5818e0ac4e71e034 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 5 Jul 2022 13:31:41 +0100 Subject: [PATCH 13/14] refactor: pandas.read_sql_query renamed to read_sql and can now also directly extract full tables. --- pyerrors/input/pandas.py | 4 ++-- tests/pandas_test.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py index 4b0e1d99..2aa2bfe9 100644 --- a/pyerrors/input/pandas.py +++ b/pyerrors/input/pandas.py @@ -29,7 +29,7 @@ def to_sql(df, table_name, db, if_exists='fail', gz=True): con.close() -def read_sql_query(sql, db, auto_gamma=False): +def read_sql(sql, db, auto_gamma=False): """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns. Parameters @@ -43,7 +43,7 @@ def read_sql_query(sql, db, auto_gamma=False): the error analysis. Default False. """ con = sqlite3.connect(db) - extract_df = pd.read_sql_query(sql, con) + extract_df = pd.read_sql(sql, con) con.close() return _deserialize_df(extract_df, auto_gamma=auto_gamma) diff --git a/tests/pandas_test.py b/tests/pandas_test.py index 22dc74a2..71961d38 100644 --- a/tests/pandas_test.py +++ b/tests/pandas_test.py @@ -60,7 +60,7 @@ def test_sql(tmp_path): my_db = (tmp_path / "test_db.sqlite").as_posix() pe.input.pandas.to_sql(pe_df, "My_table", my_db) for auto_gamma in [False, True]: - re_df = pe.input.pandas.read_sql_query("SELECT * from My_table", my_db, auto_gamma=auto_gamma) + re_df = pe.input.pandas.read_sql("SELECT * from My_table", my_db, auto_gamma=auto_gamma) assert np.all(re_df == pe_df) From 18b3672b2d8fe8f73e0ffe67023ce552c7b8db2e Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 6 Jul 2022 11:34:26 +0100 Subject: [PATCH 14/14] feat: kwargs added to pandas sql methods --- pyerrors/input/pandas.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py index 2aa2bfe9..53e6efd0 100644 --- a/pyerrors/input/pandas.py +++ b/pyerrors/input/pandas.py @@ -7,7 +7,7 @@ from ..correlators import Corr from .json import create_json_string, import_json_string -def to_sql(df, table_name, db, if_exists='fail', gz=True): +def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs): """Write DataFrame including Obs or Corr valued columns to sqlite database. Parameters @@ -25,11 +25,11 @@ def to_sql(df, table_name, db, if_exists='fail', gz=True): """ se_df = _serialize_df(df, gz=gz) con = sqlite3.connect(db) - se_df.to_sql(table_name, con, if_exists=if_exists, index=False) + se_df.to_sql(table_name, con, if_exists=if_exists, index=False, **kwargs) con.close() -def read_sql(sql, db, auto_gamma=False): +def read_sql(sql, db, auto_gamma=False, **kwargs): """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns. Parameters @@ -43,7 +43,7 @@ def read_sql(sql, db, auto_gamma=False): the error analysis. Default False. """ con = sqlite3.connect(db) - extract_df = pd.read_sql(sql, con) + extract_df = pd.read_sql(sql, con, **kwargs) con.close() return _deserialize_df(extract_df, auto_gamma=auto_gamma)