From 370eb1c148482938001865caf3b84ded78a927de Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Sun, 29 Mar 2026 18:30:08 +0200 Subject: [PATCH] [Fix] Avoid skipping NA-to-None conversion and guard auto_gamma against None lists - Replace continue with conditional to preserve NA-to-None conversion for all-null columns - Guard auto_gamma list lambda against None values to prevent TypeError - Add tests for all-empty-string columns and Obs lists with None + auto_gamma --- pyerrors/input/pandas.py | 6 ++---- tests/pandas_test.py | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py index 518e7203..ac20b4eb 100644 --- a/pyerrors/input/pandas.py +++ b/pyerrors/input/pandas.py @@ -180,14 +180,12 @@ def _deserialize_df(df, auto_gamma=False): i = 0 while i < len(df[column]) and pd.isna(df[column].iloc[i]): i += 1 - if i == len(df[column]): - continue - if isinstance(df[column].iloc[i], str): + if i < len(df[column]) and isinstance(df[column].iloc[i], str): if '"program":' in df[column].iloc[i][:20]: df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if not pd.isna(x) else None) if auto_gamma is True: if isinstance(df[column].iloc[i], list): - df[column].apply(lambda x: [o.gm() if o is not None else x for o in x]) + df[column].apply(lambda x: [o.gm() if o is not None else x for o in x] if x is not None else x) else: df[column].apply(lambda x: x.gm() if x is not None else x) # Convert NA values back to Python None for compatibility with `x is None` checks diff --git a/tests/pandas_test.py b/tests/pandas_test.py index e1f0b5f1..c1974ce3 100644 --- a/tests/pandas_test.py +++ b/tests/pandas_test.py @@ -293,6 +293,26 @@ def test_empty_df_deserialize(): assert len(result) == 0 +def test_all_empty_string_column(): + df = pd.DataFrame({"empty_str": ["", "", "", ""], + "val": [1, 2, 3, 4]}) + result = pe.input.pandas._deserialize_df(df) + assert all(result.loc[i, "empty_str"] is None for i in range(4)) + + +def test_Obs_list_with_none_auto_gamma(tmp_path): + obs_list = [pe.pseudo_Obs(0.0, 0.1, "test_ensemble2"), pe.pseudo_Obs(3.2, 1.1, "test_ensemble2")] + my_df = pd.DataFrame({"int": [1, 1, 1], + "Obs1": [pe.pseudo_Obs(17, 11, "test_ensemble")] * 3, + "Obs_list": [obs_list, None, obs_list]}) + for gz in [True, False]: + pe.input.pandas.dump_df(my_df, (tmp_path / 'df_output').as_posix(), gz=gz) + re_df = pe.input.pandas.load_df((tmp_path / 'df_output').as_posix(), auto_gamma=True, gz=gz) + assert re_df.loc[1, "Obs_list"] is None + assert len(re_df.loc[0, "Obs_list"]) == 2 + assert np.all(re_df["Obs1"] == my_df["Obs1"]) + + def test_Obs_list_sql(tmp_path): my_dict = {"int": 1, "Obs1": pe.pseudo_Obs(17, 11, "test_sql_if_exists_failnsemble"),