From e97cc519a9da6ce5cd4edd15fc13b8ebdf466758 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann <82444481+jkuhl-uni@users.noreply.github.com> Date: Mon, 22 May 2023 13:37:46 +0200 Subject: [PATCH] taking care of cols with only None values (#184) --- pyerrors/input/pandas.py | 30 +++++++++++++++++------------- tests/pandas_test.py | 16 ++++++++++++++++ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py index 911c14d5..13482983 100644 --- a/pyerrors/input/pandas.py +++ b/pyerrors/input/pandas.py @@ -171,26 +171,30 @@ def _deserialize_df(df, auto_gamma=False): if isinstance(df[column][0], bytes): if df[column][0].startswith(b"\x1f\x8b\x08\x00"): df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8')) - df = df.replace({r'^$': None}, regex=True) - i = 0 - while df[column][i] is None: - i += 1 - if isinstance(df[column][i], str): - if '"program":' in df[column][i][:20]: - df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None) - if auto_gamma is True: - if isinstance(df[column][i], list): - df[column].apply(lambda x: [o.gm() if o is not None else x for o in x]) - else: - df[column].apply(lambda x: x.gm() if x is not None else x) + + if not all([e is None for e in df[column]]): + df[column] = df[column].replace({r'^$': None}, regex=True) + i = 0 + while df[column][i] is None: + i += 1 + if isinstance(df[column][i], str): + if '"program":' in df[column][i][:20]: + df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None) + if auto_gamma is True: + if isinstance(df[column][i], list): + df[column].apply(lambda x: [o.gm() if o is not None else x for o in x]) + else: + df[column].apply(lambda x: x.gm() if x is not None else x) return df def _need_to_serialize(col): serialize = False i = 0 - while col[i] is None: + while i < len(col) and col[i] is None: i += 1 + if i == len(col): + return serialize if isinstance(col[i], (Obs, Corr)): serialize = True elif isinstance(col[i], list): diff --git a/tests/pandas_test.py b/tests/pandas_test.py index fdff4f10..3a02b97e 100644 --- a/tests/pandas_test.py +++ b/tests/pandas_test.py @@ -173,6 +173,22 @@ def test_null_second_line_df_sql_export_import(tmp_path): assert np.all(reconstructed_df.loc[2:] == my_df.loc[2:]) +def test_null_col_df_gzsql_export_import(tmp_path): + my_dict = {"int": 1, + "float": -0.01, + "Noneval": None, + "Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"), + "Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")} + my_df = pd.DataFrame([my_dict] * 4) + pe.input.pandas.to_sql(my_df, 'test', (tmp_path / 'test.db').as_posix(), gz=True) + reconstructed_df = pe.input.pandas.read_sql('SELECT * FROM test', (tmp_path / 'test.db').as_posix(), auto_gamma=True) + assert np.all(reconstructed_df["int"] == my_df["int"]) + assert np.all(reconstructed_df["float"] == my_df["float"]) + assert np.all([e is None for e in reconstructed_df["Noneval"]]) + assert np.all(reconstructed_df["Obs1"] == my_df["Obs1"]) + assert np.all(reconstructed_df["Obs2"] == my_df["Obs2"]) + + def test_df_Corr(tmp_path): my_corr = pe.Corr([pe.pseudo_Obs(-0.48, 0.04, "test"), pe.pseudo_Obs(-0.154, 0.03, "test")])