From e97cc519a9da6ce5cd4edd15fc13b8ebdf466758 Mon Sep 17 00:00:00 2001
From: Justus Kuhlmann <82444481+jkuhl-uni@users.noreply.github.com>
Date: Mon, 22 May 2023 13:37:46 +0200
Subject: [PATCH] taking care of cols with only None values (#184)

---
 pyerrors/input/pandas.py | 30 +++++++++++++++++-------------
 tests/pandas_test.py     | 16 ++++++++++++++++
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py
index 911c14d5..13482983 100644
--- a/pyerrors/input/pandas.py
+++ b/pyerrors/input/pandas.py
@@ -171,26 +171,30 @@ def _deserialize_df(df, auto_gamma=False):
         if isinstance(df[column][0], bytes):
             if df[column][0].startswith(b"\x1f\x8b\x08\x00"):
                 df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8'))
-        df = df.replace({r'^$': None}, regex=True)
-        i = 0
-        while df[column][i] is None:
-            i += 1
-        if isinstance(df[column][i], str):
-            if '"program":' in df[column][i][:20]:
-                df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None)
-                if auto_gamma is True:
-                    if isinstance(df[column][i], list):
-                        df[column].apply(lambda x: [o.gm() if o is not None else x for o in x])
-                    else:
-                        df[column].apply(lambda x: x.gm() if x is not None else x)
+
+        if not all([e is None for e in df[column]]):
+            df[column] = df[column].replace({r'^$': None}, regex=True)
+            i = 0
+            while df[column][i] is None:
+                i += 1
+            if isinstance(df[column][i], str):
+                if '"program":' in df[column][i][:20]:
+                    df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None)
+                    if auto_gamma is True:
+                        if isinstance(df[column][i], list):
+                            df[column].apply(lambda x: [o.gm() if o is not None else x for o in x])
+                        else:
+                            df[column].apply(lambda x: x.gm() if x is not None else x)
     return df
 
 
 def _need_to_serialize(col):
     serialize = False
     i = 0
-    while col[i] is None:
+    while i < len(col) and col[i] is None:
         i += 1
+    if i == len(col):
+        return serialize
     if isinstance(col[i], (Obs, Corr)):
         serialize = True
     elif isinstance(col[i], list):
diff --git a/tests/pandas_test.py b/tests/pandas_test.py
index fdff4f10..3a02b97e 100644
--- a/tests/pandas_test.py
+++ b/tests/pandas_test.py
@@ -173,6 +173,22 @@ def test_null_second_line_df_sql_export_import(tmp_path):
     assert np.all(reconstructed_df.loc[2:] == my_df.loc[2:])
 
 
+def test_null_col_df_gzsql_export_import(tmp_path):
+    my_dict = {"int": 1,
+               "float": -0.01,
+               "Noneval": None,
+               "Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
+               "Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
+    my_df = pd.DataFrame([my_dict] * 4)
+    pe.input.pandas.to_sql(my_df, 'test', (tmp_path / 'test.db').as_posix(), gz=True)
+    reconstructed_df = pe.input.pandas.read_sql('SELECT * FROM test', (tmp_path / 'test.db').as_posix(), auto_gamma=True)
+    assert np.all(reconstructed_df["int"] == my_df["int"])
+    assert np.all(reconstructed_df["float"] == my_df["float"])
+    assert np.all([e is None for e in reconstructed_df["Noneval"]])
+    assert np.all(reconstructed_df["Obs1"] == my_df["Obs1"])
+    assert np.all(reconstructed_df["Obs2"] == my_df["Obs2"])
+
+
 def test_df_Corr(tmp_path):
 
     my_corr = pe.Corr([pe.pseudo_Obs(-0.48, 0.04, "test"), pe.pseudo_Obs(-0.154, 0.03, "test")])